Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions internal/pkg/agent/application/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,22 @@
return nil, nil, nil, errors.New(err, "failed to initialize composable controller")
}

<<<<<<< HEAD

Check failure on line 243 in internal/pkg/agent/application/application.go

View workflow job for this annotation

GitHub Actions / lint (ubuntu-latest)

expected statement, found '<<' (typecheck)
otelManager := otelmanager.NewOTelManager(log.Named("otel_manager"), baseLogger)
=======
otelManager, err := otelmanager.NewOTelManager(
log.Named("otel_manager"),
logLevel, baseLogger,
otelExecMode,
agentInfo,
cfg.Settings.Collector,
monitor.ComponentMonitoringConfig,
otelmanager.CollectorStopTimeout,
)
if err != nil {
return nil, nil, nil, fmt.Errorf("failed to create otel manager: %w", err)
}
>>>>>>> 9c001b07d (fix: zombie processes during restart (#10650))

Check failure on line 258 in internal/pkg/agent/application/application.go

View workflow job for this annotation

GitHub Actions / lint (ubuntu-latest)

expected statement, found '>>' (typecheck)
coord := coordinator.New(log, cfg, logLevel, agentInfo, specs, reexec, upgrader, runtime, configMgr, varsManager, caps, monitor, isManaged, otelManager, actionAcker, initialUpgradeDetails, compModifiers...)
if managed != nil {
// the coordinator requires the config manager as well as in managed-mode the config manager requires the
Expand Down
5 changes: 5 additions & 0 deletions internal/pkg/agent/application/coordinator/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ type ComponentsModifier func(comps []component.Component, cfg map[string]interfa

// managerShutdownTimeout is how long the coordinator will wait during shutdown
// to receive termination states from its managers.
// Note: The current timeout (5s) is shorter than the default stop timeout for
// subprocess components (30s from process.DefaultConfig()). This means the
// coordinator may not wait for the subprocesses to finish terminating, preventing
// Wait() from being called on them. This will result in zombie processes
// during restart on Unix systems.
const managerShutdownTimeout = time.Second * 5

type configReloader interface {
Expand Down
2 changes: 2 additions & 0 deletions internal/pkg/agent/application/reexec/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ func NewManager(log *logger.Logger, exec string) ExecManager {

func (m *manager) ReExec(shutdownCallback ShutdownCallbackFn, argOverrides ...string) {
go func() {
m.logger.Debug("Triggering manager shutdown")
close(m.trigger)
<-m.shutdown
m.logger.Debug("Manager shutdown complete")

if shutdownCallback != nil {
if err := shutdownCallback(); err != nil {
Expand Down
31 changes: 31 additions & 0 deletions internal/pkg/otel/manager/execution.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License 2.0;
// you may not use this file except in compliance with the Elastic License 2.0.

package manager

import (
"context"
"time"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/status"
"go.opentelemetry.io/collector/confmap"

"github.com/elastic/elastic-agent/pkg/core/logger"
)

type collectorExecution interface {
// startCollector starts the otel collector with the given arguments, returning a handle allowing it to be stopped.
// Cancelling the context will stop all goroutines involved in the execution.
// The collector will report status events in the statusCh channel and errors on errCh in a non-blocking fashion,
// draining the channel before writing to it.
// After the collector exits, it will emit an error describing the exit status (nil if successful) and a nil status.
startCollector(ctx context.Context, logger *logger.Logger, cfg *confmap.Conf, errCh chan error, statusCh chan *status.AggregateStatus) (collectorHandle, error)
}

type collectorHandle interface {
// Stop stops and waits for collector to exit gracefully within the given duration. Note that if the collector
// doesn't exit within that time, it will be killed and then it will wait an extra second for it to ensure it's
// really stopped.
Stop(waitTime time.Duration)
}
Loading
Loading