Skip to content

Commit 5ed5fff

Browse files
authored
feat(core): add modelAvailabilityService for managing and tracking model health (#13426)
1 parent d91ca38 commit 5ed5fff

File tree

2 files changed

+296
-0
lines changed

2 files changed

+296
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/**
2+
* @license
3+
* Copyright 2025 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import { describe, expect, it, vi, beforeEach } from 'vitest';
8+
import { ModelAvailabilityService } from './modelAvailabilityService.js';
9+
10+
describe('ModelAvailabilityService', () => {
11+
let service: ModelAvailabilityService;
12+
const model = 'test-model';
13+
14+
beforeEach(() => {
15+
service = new ModelAvailabilityService();
16+
vi.useRealTimers();
17+
});
18+
19+
it('returns available snapshot when no state recorded', () => {
20+
expect(service.snapshot(model)).toEqual({ available: true });
21+
});
22+
23+
it('tracks retry-once-per-turn failures', () => {
24+
service.markRetryOncePerTurn(model);
25+
expect(service.snapshot(model)).toEqual({ available: true });
26+
27+
service.consumeStickyAttempt(model);
28+
expect(service.snapshot(model)).toEqual({
29+
available: false,
30+
reason: 'retry_once_per_turn',
31+
});
32+
33+
service.resetTurn();
34+
expect(service.snapshot(model)).toEqual({ available: true });
35+
});
36+
37+
it('tracks terminal failures', () => {
38+
service.markTerminal(model, 'quota');
39+
expect(service.snapshot(model)).toEqual({
40+
available: false,
41+
reason: 'quota',
42+
});
43+
});
44+
45+
it('does not override terminal failure with sticky failure', () => {
46+
service.markTerminal(model, 'quota');
47+
service.markRetryOncePerTurn(model);
48+
expect(service.snapshot(model)).toEqual({
49+
available: false,
50+
reason: 'quota',
51+
});
52+
});
53+
54+
it('selects models respecting terminal and sticky states', () => {
55+
const stickyModel = 'stick-model';
56+
const healthyModel = 'healthy-model';
57+
58+
service.markTerminal(model, 'capacity');
59+
service.markRetryOncePerTurn(stickyModel);
60+
61+
const first = service.selectFirstAvailable([
62+
model,
63+
stickyModel,
64+
healthyModel,
65+
]);
66+
expect(first).toEqual({
67+
selected: stickyModel,
68+
attempts: 1,
69+
skipped: [
70+
{
71+
model,
72+
reason: 'capacity',
73+
},
74+
],
75+
});
76+
77+
service.consumeStickyAttempt(stickyModel);
78+
const second = service.selectFirstAvailable([
79+
model,
80+
stickyModel,
81+
healthyModel,
82+
]);
83+
expect(second).toEqual({
84+
selected: healthyModel,
85+
skipped: [
86+
{
87+
model,
88+
reason: 'capacity',
89+
},
90+
{
91+
model: stickyModel,
92+
reason: 'retry_once_per_turn',
93+
},
94+
],
95+
});
96+
97+
service.resetTurn();
98+
const third = service.selectFirstAvailable([
99+
model,
100+
stickyModel,
101+
healthyModel,
102+
]);
103+
expect(third).toEqual({
104+
selected: stickyModel,
105+
attempts: 1,
106+
skipped: [
107+
{
108+
model,
109+
reason: 'capacity',
110+
},
111+
],
112+
});
113+
});
114+
115+
it('preserves consumed state when marking retry-once-per-turn again', () => {
116+
service.markRetryOncePerTurn(model);
117+
service.consumeStickyAttempt(model);
118+
119+
// It is currently consumed
120+
expect(service.snapshot(model).available).toBe(false);
121+
122+
// Marking it again should not reset the consumed flag
123+
service.markRetryOncePerTurn(model);
124+
expect(service.snapshot(model).available).toBe(false);
125+
});
126+
127+
it('clears consumed state when marked healthy', () => {
128+
service.markRetryOncePerTurn(model);
129+
service.consumeStickyAttempt(model);
130+
expect(service.snapshot(model).available).toBe(false);
131+
132+
service.markHealthy(model);
133+
expect(service.snapshot(model).available).toBe(true);
134+
135+
// If we mark it sticky again, it should be fresh (not consumed)
136+
service.markRetryOncePerTurn(model);
137+
expect(service.snapshot(model).available).toBe(true);
138+
});
139+
140+
it('resetTurn resets consumed state for multiple sticky models', () => {
141+
const model2 = 'model-2';
142+
service.markRetryOncePerTurn(model);
143+
service.markRetryOncePerTurn(model2);
144+
145+
service.consumeStickyAttempt(model);
146+
service.consumeStickyAttempt(model2);
147+
148+
expect(service.snapshot(model).available).toBe(false);
149+
expect(service.snapshot(model2).available).toBe(false);
150+
151+
service.resetTurn();
152+
153+
expect(service.snapshot(model).available).toBe(true);
154+
expect(service.snapshot(model2).available).toBe(true);
155+
});
156+
157+
it('resetTurn does not affect terminal models', () => {
158+
service.markTerminal(model, 'quota');
159+
service.resetTurn();
160+
expect(service.snapshot(model)).toEqual({
161+
available: false,
162+
reason: 'quota',
163+
});
164+
});
165+
});
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/**
2+
* @license
3+
* Copyright 2025 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
export type ModelId = string;
8+
9+
export type TerminalUnavailabilityReason = 'quota' | 'capacity';
10+
export type TurnUnavailabilityReason = 'retry_once_per_turn';
11+
12+
export type UnavailabilityReason =
13+
| TerminalUnavailabilityReason
14+
| TurnUnavailabilityReason
15+
| 'unknown';
16+
17+
type HealthState =
18+
| { status: 'terminal'; reason: TerminalUnavailabilityReason }
19+
| {
20+
status: 'sticky_retry';
21+
reason: TurnUnavailabilityReason;
22+
consumed: boolean;
23+
};
24+
25+
export interface ModelAvailabilitySnapshot {
26+
available: boolean;
27+
reason?: UnavailabilityReason;
28+
}
29+
30+
export interface ModelSelectionResult {
31+
selected: ModelId | null;
32+
attempts?: number;
33+
skipped: Array<{
34+
model: ModelId;
35+
reason: UnavailabilityReason;
36+
}>;
37+
}
38+
39+
export class ModelAvailabilityService {
40+
private readonly health = new Map<ModelId, HealthState>();
41+
42+
markTerminal(model: ModelId, reason: TerminalUnavailabilityReason) {
43+
this.setState(model, {
44+
status: 'terminal',
45+
reason,
46+
});
47+
}
48+
49+
markHealthy(model: ModelId) {
50+
this.clearState(model);
51+
}
52+
53+
markRetryOncePerTurn(model: ModelId) {
54+
const currentState = this.health.get(model);
55+
// Do not override a terminal failure with a transient one.
56+
if (currentState?.status === 'terminal') {
57+
return;
58+
}
59+
60+
// Only reset consumption if we are not already in the sticky_retry state.
61+
// This prevents infinite loops if the model fails repeatedly in the same turn.
62+
let consumed = false;
63+
if (currentState?.status === 'sticky_retry') {
64+
consumed = currentState.consumed;
65+
}
66+
67+
this.setState(model, {
68+
status: 'sticky_retry',
69+
reason: 'retry_once_per_turn',
70+
consumed,
71+
});
72+
}
73+
74+
consumeStickyAttempt(model: ModelId) {
75+
const state = this.health.get(model);
76+
if (state?.status === 'sticky_retry') {
77+
this.setState(model, { ...state, consumed: true });
78+
}
79+
}
80+
81+
snapshot(model: ModelId): ModelAvailabilitySnapshot {
82+
const state = this.health.get(model);
83+
84+
if (!state) {
85+
return { available: true };
86+
}
87+
88+
if (state.status === 'terminal') {
89+
return { available: false, reason: state.reason };
90+
}
91+
92+
if (state.status === 'sticky_retry' && state.consumed) {
93+
return { available: false, reason: state.reason };
94+
}
95+
96+
return { available: true };
97+
}
98+
99+
selectFirstAvailable(models: ModelId[]): ModelSelectionResult {
100+
const skipped: ModelSelectionResult['skipped'] = [];
101+
102+
for (const model of models) {
103+
const snapshot = this.snapshot(model);
104+
if (snapshot.available) {
105+
const state = this.health.get(model);
106+
// A sticky model is being attempted, so note that.
107+
const attempts = state?.status === 'sticky_retry' ? 1 : undefined;
108+
return { selected: model, skipped, attempts };
109+
} else {
110+
skipped.push({ model, reason: snapshot.reason ?? 'unknown' });
111+
}
112+
}
113+
return { selected: null, skipped };
114+
}
115+
116+
resetTurn() {
117+
for (const [model, state] of this.health.entries()) {
118+
if (state.status === 'sticky_retry') {
119+
this.setState(model, { ...state, consumed: false });
120+
}
121+
}
122+
}
123+
124+
private setState(model: ModelId, nextState: HealthState) {
125+
this.health.set(model, nextState);
126+
}
127+
128+
private clearState(model: ModelId) {
129+
this.health.delete(model);
130+
}
131+
}

0 commit comments

Comments
 (0)