Skip to content

Commit 053b60a

Browse files
authored
refactor: require callbacks at construction time to prevent leaky abstraction bugs (#570)
This refactor prevents the class of bug where Start/Reconnect could be called without callbacks configured, resulting in frozen output display. Changes: - Add ManagerCallbacks struct to ManagerOptions for construction-time callbacks - Add ErrManagerNotConfigured sentinel error returned by Start/StartWithResume/Reconnect - Add runtime guards that check the 'configured' flag before starting - Log warning when monitor-bell fails in Reconnect instead of silent ignore - Propagate configuration errors in ReconnectInstance instead of masking them - Deprecate Set*Callback methods (kept for backward compatibility) - Remove dead code: RecoverSession() and configureInstanceCallbacks() - Add comprehensive tests for guard behavior
1 parent 5d8c6dd commit 053b60a

File tree

4 files changed

+180
-120
lines changed

4 files changed

+180
-120
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
12+
- **Instance Manager Callbacks Required at Construction** - Callbacks (OnStateChange, OnMetrics, OnTimeout, OnBell) are now passed via `ManagerCallbacks` struct in `ManagerOptions` at construction time, rather than being set separately via setter methods. This prevents the "leaky abstraction" bug where `Start()`/`Reconnect()` could be called without callbacks configured. The callback setter methods are now deprecated.
13+
14+
- **Runtime Guards on Instance Lifecycle Methods** - `Start()`, `StartWithResume()`, and `Reconnect()` now return `ErrManagerNotConfigured` if the Manager was not properly constructed via `NewManagerWithDeps`. This provides defense-in-depth against misconfigured managers.
15+
16+
- **Improved Error Handling** - The `Reconnect()` method now logs a warning if enabling monitor-bell fails, instead of silently ignoring the error. The `ReconnectInstance()` fallback logic now properly propagates configuration errors instead of masking them.
17+
18+
### Removed
19+
20+
- **Dead Code Cleanup** - Removed unused `RecoverSession()` method from orchestrator that had duplicate (and incomplete) callback configuration code, which was missing the metrics callback. Also removed the now-unused `configureInstanceCallbacks()` method since callbacks are configured at construction time.
21+
1022
## [0.12.5] - 2026-01-22
1123

1224
### Fixed

internal/instance/manager.go

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ const (
3939
// At 100ms per tick, 50 ticks = 5 seconds between full refreshes.
4040
const fullRefreshInterval = 50
4141

42+
// ErrManagerNotConfigured is returned when Start, StartWithResume, or Reconnect
43+
// is called on a manager that was not properly constructed with NewManagerWithDeps.
44+
var ErrManagerNotConfigured = fmt.Errorf("manager not properly configured: use NewManagerWithDeps with Callbacks")
45+
4246
// pausedHeartbeatInterval is the number of ticks between session existence checks
4347
// when an instance is paused. This allows detecting completion of background instances
4448
// without the overhead of full capture. At 100ms per tick, 50 ticks = 5 seconds.
@@ -83,6 +87,23 @@ func DefaultManagerConfig() ManagerConfig {
8387
// MetricsChangeCallback is called when metrics are updated
8488
type MetricsChangeCallback func(instanceID string, metrics *metrics.ParsedMetrics)
8589

90+
// ManagerCallbacks holds all callbacks required for a properly configured Manager.
91+
// These must be provided at construction time to ensure the Manager can communicate
92+
// state changes, metrics, timeouts, and bells to the orchestrator.
93+
//
94+
// All callbacks are optional for testing purposes, but in production use at least
95+
// OnStateChange should be set to handle instance completion and waiting states.
96+
type ManagerCallbacks struct {
97+
// OnStateChange is called when the detected waiting state changes (completed, waiting input, etc.)
98+
OnStateChange StateChangeCallback
99+
// OnMetrics is called when token/cost metrics are updated
100+
OnMetrics MetricsChangeCallback
101+
// OnTimeout is called when activity, completion, or stale timeout is detected
102+
OnTimeout TimeoutCallback
103+
// OnBell is called when a terminal bell is detected
104+
OnBell BellCallback
105+
}
106+
86107
// ManagerOptions holds explicit dependencies for creating a Manager.
87108
// Use NewManagerWithDeps to create a Manager with these options.
88109
type ManagerOptions struct {
@@ -91,6 +112,7 @@ type ManagerOptions struct {
91112
WorkDir string
92113
Task string
93114
Config ManagerConfig
115+
Callbacks ManagerCallbacks // Callbacks for state changes, metrics, timeouts, bells
94116
StateMonitor *state.Monitor // Optional - if nil, an internal monitor is created
95117
LifecycleManager *lifecycle.Manager // Optional - if set, delegates Start/Stop/Reconnect
96118
ClaudeSessionID string // Optional - Claude's internal session UUID for resume capability
@@ -114,6 +136,11 @@ type Manager struct {
114136
captureTick *time.Ticker
115137
config ManagerConfig
116138

139+
// configured tracks whether the manager was properly constructed with callbacks.
140+
// This prevents the "leaky abstraction" bug where Start/Reconnect could be called
141+
// without callbacks configured, resulting in frozen output display.
142+
configured bool
143+
117144
// State detection - delegated to stateMonitor
118145
stateCallback StateChangeCallback
119146

@@ -211,7 +238,12 @@ func NewManagerWithDeps(opts ManagerOptions) *Manager {
211238
outputBuf: capture.NewRingBuffer(cfg.OutputBufferSize),
212239
doneChan: make(chan struct{}),
213240
config: cfg,
241+
configured: true, // Mark as properly constructed
242+
stateCallback: opts.Callbacks.OnStateChange,
214243
metricsParser: metrics.NewMetricsParser(),
244+
metricsCallback: opts.Callbacks.OnMetrics,
245+
timeoutCallback: opts.Callbacks.OnTimeout,
246+
bellCallback: opts.Callbacks.OnBell,
215247
inputHandler: input.NewHandler(
216248
input.WithPersistentSender(sessionName, socketName),
217249
input.WithBatching(sessionName, input.DefaultBatchConfig()),
@@ -221,28 +253,40 @@ func NewManagerWithDeps(opts ManagerOptions) *Manager {
221253
}
222254
}
223255

224-
// SetStateCallback sets a callback that will be invoked when the detected state changes
256+
// SetStateCallback sets a callback that will be invoked when the detected state changes.
257+
//
258+
// Deprecated: Use ManagerOptions.Callbacks.OnStateChange at construction time instead.
259+
// This setter exists only for backward compatibility and testing.
225260
func (m *Manager) SetStateCallback(cb StateChangeCallback) {
226261
m.mu.Lock()
227262
defer m.mu.Unlock()
228263
m.stateCallback = cb
229264
}
230265

231-
// SetMetricsCallback sets a callback that will be invoked when metrics are updated
266+
// SetMetricsCallback sets a callback that will be invoked when metrics are updated.
267+
//
268+
// Deprecated: Use ManagerOptions.Callbacks.OnMetrics at construction time instead.
269+
// This setter exists only for backward compatibility and testing.
232270
func (m *Manager) SetMetricsCallback(cb MetricsChangeCallback) {
233271
m.mu.Lock()
234272
defer m.mu.Unlock()
235273
m.metricsCallback = cb
236274
}
237275

238-
// SetTimeoutCallback sets a callback that will be invoked when a timeout is detected
276+
// SetTimeoutCallback sets a callback that will be invoked when a timeout is detected.
277+
//
278+
// Deprecated: Use ManagerOptions.Callbacks.OnTimeout at construction time instead.
279+
// This setter exists only for backward compatibility and testing.
239280
func (m *Manager) SetTimeoutCallback(cb TimeoutCallback) {
240281
m.mu.Lock()
241282
defer m.mu.Unlock()
242283
m.timeoutCallback = cb
243284
}
244285

245-
// SetBellCallback sets a callback that will be invoked when a terminal bell is detected
286+
// SetBellCallback sets a callback that will be invoked when a terminal bell is detected.
287+
//
288+
// Deprecated: Use ManagerOptions.Callbacks.OnBell at construction time instead.
289+
// This setter exists only for backward compatibility and testing.
246290
func (m *Manager) SetBellCallback(cb BellCallback) {
247291
m.mu.Lock()
248292
defer m.mu.Unlock()
@@ -307,7 +351,13 @@ func (m *Manager) ClearTimeout() {
307351

308352
// Start launches the Claude Code process in a tmux session.
309353
// If a LifecycleManager is configured, delegates to it for tmux session management.
354+
//
355+
// Returns ErrManagerNotConfigured if the manager was not created via NewManagerWithDeps.
310356
func (m *Manager) Start() error {
357+
if !m.configured {
358+
return ErrManagerNotConfigured
359+
}
360+
311361
// Delegate to lifecycle manager if available
312362
if m.lifecycleManager != nil {
313363
return m.lifecycleManager.Start(m)
@@ -429,7 +479,13 @@ func (m *Manager) Start() error {
429479
// StartWithResume launches Claude Code with --resume to continue a previous session.
430480
// This requires a valid ClaudeSessionID to be set (either via ManagerOptions or SetClaudeSessionID).
431481
// The resumed session will continue from where it left off.
482+
//
483+
// Returns ErrManagerNotConfigured if the manager was not created via NewManagerWithDeps.
432484
func (m *Manager) StartWithResume() error {
485+
if !m.configured {
486+
return ErrManagerNotConfigured
487+
}
488+
433489
// Delegate to lifecycle manager if available
434490
if m.lifecycleManager != nil {
435491
return m.lifecycleManager.Start(m)
@@ -1159,9 +1215,15 @@ func (m *Manager) TmuxSessionExists() bool {
11591215
return cmd.Run() == nil
11601216
}
11611217

1162-
// Reconnect attempts to reconnect to an existing tmux session
1163-
// This is used for session recovery after a restart
1218+
// Reconnect attempts to reconnect to an existing tmux session.
1219+
// This is used for session recovery after a restart.
1220+
//
1221+
// Returns ErrManagerNotConfigured if the manager was not created via NewManagerWithDeps.
11641222
func (m *Manager) Reconnect() error {
1223+
if !m.configured {
1224+
return ErrManagerNotConfigured
1225+
}
1226+
11651227
// Delegate to lifecycle manager if available
11661228
if m.lifecycleManager != nil {
11671229
return m.lifecycleManager.Reconnect(m)
@@ -1181,7 +1243,13 @@ func (m *Manager) Reconnect() error {
11811243

11821244
// Ensure monitor-bell is enabled for bell detection (may not be set if session was created before this feature)
11831245
ctx, cancel := context.WithTimeout(context.Background(), tmuxCommandTimeout)
1184-
_ = m.tmuxCmdCtx(ctx, "set-option", "-t", m.sessionName, "-w", "monitor-bell", "on").Run()
1246+
if err := m.tmuxCmdCtx(ctx, "set-option", "-t", m.sessionName, "-w", "monitor-bell", "on").Run(); err != nil {
1247+
if m.logger != nil {
1248+
m.logger.Warn("failed to enable monitor-bell on reconnect (bells may not be detected)",
1249+
"session_name", m.sessionName,
1250+
"error", err.Error())
1251+
}
1252+
}
11851253
cancel()
11861254

11871255
m.running = true

internal/instance/manager_test.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package instance
22

33
import (
4+
"errors"
45
"strings"
56
"testing"
67
"time"
@@ -281,6 +282,64 @@ func TestManager_Reconnect_NoSession(t *testing.T) {
281282
}
282283
}
283284

285+
// Tests for the "configured" guard that prevents leaky abstraction bugs.
286+
// These tests verify that Start/StartWithResume/Reconnect fail if the Manager
287+
// was not properly constructed via NewManagerWithDeps.
288+
289+
func TestManager_Start_ReturnsErrorIfNotConfigured(t *testing.T) {
290+
// Create an unconfigured manager (bypass NewManagerWithDeps)
291+
mgr := &Manager{
292+
id: "test",
293+
configured: false,
294+
}
295+
296+
err := mgr.Start()
297+
if err == nil {
298+
t.Fatal("Start() should return error for unconfigured manager")
299+
}
300+
if !errors.Is(err, ErrManagerNotConfigured) {
301+
t.Errorf("expected ErrManagerNotConfigured, got: %v", err)
302+
}
303+
}
304+
305+
func TestManager_StartWithResume_ReturnsErrorIfNotConfigured(t *testing.T) {
306+
mgr := &Manager{
307+
id: "test",
308+
configured: false,
309+
}
310+
311+
err := mgr.StartWithResume()
312+
if err == nil {
313+
t.Fatal("StartWithResume() should return error for unconfigured manager")
314+
}
315+
if !errors.Is(err, ErrManagerNotConfigured) {
316+
t.Errorf("expected ErrManagerNotConfigured, got: %v", err)
317+
}
318+
}
319+
320+
func TestManager_Reconnect_ReturnsErrorIfNotConfigured(t *testing.T) {
321+
mgr := &Manager{
322+
id: "test",
323+
configured: false,
324+
}
325+
326+
err := mgr.Reconnect()
327+
if err == nil {
328+
t.Fatal("Reconnect() should return error for unconfigured manager")
329+
}
330+
if !errors.Is(err, ErrManagerNotConfigured) {
331+
t.Errorf("expected ErrManagerNotConfigured, got: %v", err)
332+
}
333+
}
334+
335+
func TestManager_ProperlyConfiguredViaNewManagerWithDeps(t *testing.T) {
336+
// Verify that NewManagerWithDeps sets configured = true
337+
mgr := newTestManager("test", "/tmp", "task")
338+
if !mgr.configured {
339+
t.Error("NewManagerWithDeps should set configured = true")
340+
}
341+
}
342+
284343
func TestManager_Stop_NotRunning(t *testing.T) {
285344
mgr := newTestManager("test", "/tmp", "task")
286345

0 commit comments

Comments
 (0)