Skip to content

Commit c311669

Browse files
authored
fix(kiloclaw): fix outdated-controller error handling in Kilo CLI recovery dialog (#1835)
* fix(kiloclaw): propagate controller_route_unavailable through DO RPC boundary The GatewayControllerError.code property was lost when crossing the DO RPC serialization boundary, causing startKiloCliRun to return a generic 500 instead of the controller_route_unavailable code the client needs to show the redeploy UI. Follow the established pattern: catch isErrorUnknownRoute in the DO and return null, then handle null in the platform route to emit the proper error code in the HTTP response body. * fix(kiloclaw): keep recovery dialog open during redeploy and block until machine ready After dispatching a redeploy, the dialog now stays open and transitions to a waiting state with a restart banner. The prompt textarea and Run Recovery button are disabled until the machine status polls back as running. Stale needs-redeploy errors are cleared via useEffect when machineStatus changes, so reopening the dialog after a redeploy shows the correct state.
1 parent a66d7f8 commit c311669

File tree

5 files changed

+192
-65
lines changed

5 files changed

+192
-65
lines changed

kiloclaw/src/durable-objects/kiloclaw-instance/kilo-cli-run.ts

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import {
44
KiloCliRunStatusResponseSchema,
55
GatewayCommandResponseSchema,
66
} from '../gateway-controller-types';
7-
import { callGatewayController } from './gateway';
7+
import { callGatewayController, isErrorUnknownRoute } from './gateway';
88
import type { InstanceMutableState } from './types';
99

1010
type KiloCliRunStartResponse = {
@@ -29,19 +29,24 @@ export async function startKiloCliRun(
2929
state: InstanceMutableState,
3030
env: KiloClawEnv,
3131
prompt: string
32-
): Promise<KiloCliRunStartResponse> {
32+
): Promise<KiloCliRunStartResponse | null> {
3333
if (state.status !== 'running' || !state.flyMachineId) {
3434
throw Object.assign(new Error('Instance is not running'), { status: 409 });
3535
}
3636

37-
return callGatewayController(
38-
state,
39-
env,
40-
'/_kilo/cli-run/start',
41-
'POST',
42-
KiloCliRunStartResponseSchema,
43-
{ prompt }
44-
);
37+
try {
38+
return await callGatewayController(
39+
state,
40+
env,
41+
'/_kilo/cli-run/start',
42+
'POST',
43+
KiloCliRunStartResponseSchema,
44+
{ prompt }
45+
);
46+
} catch (error) {
47+
if (isErrorUnknownRoute(error)) return null;
48+
throw error;
49+
}
4550
}
4651

4752
/**

kiloclaw/src/routes/platform.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,10 +1097,17 @@ platform.post('/kilo-cli-run/start', async c => {
10971097
stub => stub.startKiloCliRun(result.data.prompt),
10981098
'startKiloCliRun'
10991099
);
1100+
if (!response) {
1101+
return jsonError(
1102+
'Kilo CLI agent not available (controller too old)',
1103+
404,
1104+
'controller_route_unavailable'
1105+
);
1106+
}
11001107
return c.json(response, 200);
11011108
} catch (err) {
1102-
const { message, status } = sanitizeError(err, 'kilo-cli-run start');
1103-
return jsonError(message, status);
1109+
const { message, status, code } = sanitizeOpenclawConfigError(err, 'kilo-cli-run start');
1110+
return jsonError(message, status, code);
11041111
}
11051112
});
11061113

src/app/(app)/claw/components/InstanceControls.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,11 @@ export function InstanceControls({
400400
onOpenChange={setDoctorOpen}
401401
mutation={mutations.runDoctor}
402402
/>
403-
<StartKiloCliRunDialog open={kiloRunOpen} onOpenChange={setKiloRunOpen} />
403+
<StartKiloCliRunDialog
404+
open={kiloRunOpen}
405+
onOpenChange={setKiloRunOpen}
406+
machineStatus={status.status}
407+
/>
404408
</div>
405409
);
406410
}

src/app/(app)/claw/components/StartKiloCliRunDialog.tsx

Lines changed: 142 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
'use client';
22

3-
import { useState } from 'react';
3+
import { useEffect, useState } from 'react';
44
import { useRouter } from 'next/navigation';
5-
import { Loader2, Terminal } from 'lucide-react';
5+
import { AlertTriangle, Loader2, RotateCw, Terminal } from 'lucide-react';
6+
import { toast } from 'sonner';
67
import {
78
Dialog,
89
DialogContent,
@@ -14,18 +15,47 @@ import {
1415
import { Button } from '@/components/ui/button';
1516
import { Textarea } from '@/components/ui/textarea';
1617
import { useKiloClawMutations } from '@/hooks/useKiloClaw';
18+
import type { PlatformStatusResponse } from '@/lib/kiloclaw/types';
19+
import { AnimatedDots } from './AnimatedDots';
20+
21+
function isNeedsRedeployError(error: unknown): boolean {
22+
return (
23+
typeof error === 'object' &&
24+
error !== null &&
25+
'data' in error &&
26+
typeof (error as { data?: unknown }).data === 'object' &&
27+
(error as { data?: { upstreamCode?: unknown } }).data !== null &&
28+
(error as { data: { upstreamCode?: unknown } }).data.upstreamCode ===
29+
'controller_route_unavailable'
30+
);
31+
}
1732

1833
export function StartKiloCliRunDialog({
1934
open,
2035
onOpenChange,
36+
machineStatus,
2137
}: {
2238
open: boolean;
2339
onOpenChange: (open: boolean) => void;
40+
machineStatus: PlatformStatusResponse['status'];
2441
}) {
2542
const router = useRouter();
2643
const [prompt, setPrompt] = useState('');
2744
const mutations = useKiloClawMutations();
2845
const startMutation = mutations.startKiloCliRun;
46+
const redeployMutation = mutations.restartMachine;
47+
48+
const needsRedeploy = startMutation.isError && isNeedsRedeployError(startMutation.error);
49+
const machineReady = machineStatus === 'running';
50+
51+
// Clear stale "needs redeploy" error when the machine status changes away
52+
// from running (e.g. restarting after a redeploy was dispatched). This
53+
// ensures reopening the dialog shows the prompt form, not the old error.
54+
useEffect(() => {
55+
if (needsRedeploy && machineStatus !== 'running') {
56+
startMutation.reset();
57+
}
58+
}, [needsRedeploy, machineStatus, startMutation]);
2959

3060
const handleStart = () => {
3161
const trimmed = prompt.trim();
@@ -41,7 +71,20 @@ export function StartKiloCliRunDialog({
4171
);
4272
};
4373

74+
const handleRedeploy = () => {
75+
redeployMutation.mutate(
76+
{ imageTag: 'latest' },
77+
{
78+
onSuccess: () => {
79+
startMutation.reset();
80+
},
81+
onError: err => toast.error(err.message, { duration: 10000 }),
82+
}
83+
);
84+
};
85+
4486
const handleOpenChange = (nextOpen: boolean) => {
87+
if (!nextOpen && redeployMutation.isPending) return;
4588
if (!nextOpen) {
4689
setPrompt('');
4790
startMutation.reset();
@@ -58,55 +101,107 @@ export function StartKiloCliRunDialog({
58101
Recover with Kilo CLI Agent
59102
</DialogTitle>
60103
<DialogDescription>
61-
If your KiloClaw instance is stuck or failing, the Kilo CLI agent can help diagnose and
62-
fix the problem. Describe the issue below and the agent will work autonomously to
63-
resolve it.
104+
{needsRedeploy
105+
? 'Your instance needs to be redeployed before the recovery agent can run.'
106+
: !machineReady
107+
? 'Waiting for your instance to come back online before the recovery agent can run.'
108+
: 'If your KiloClaw instance is stuck or failing, the Kilo CLI agent can help diagnose and fix the problem. Describe the issue below and the agent will work autonomously to resolve it.'}
64109
</DialogDescription>
65110
</DialogHeader>
66111

67-
<div className="space-y-2">
68-
<Textarea
69-
placeholder="Describe the problem you're trying to solve (e.g. &quot;I can't connect to the gateway&quot; or &quot;The bot's cron jobs aren't checking in&quot;)"
70-
value={prompt}
71-
onChange={e => setPrompt(e.target.value)}
72-
className="min-h-30 resize-none"
73-
maxLength={10_000}
74-
autoFocus
75-
onKeyDown={e => {
76-
if (e.key === 'Enter' && (e.metaKey || e.ctrlKey)) {
77-
e.preventDefault();
78-
handleStart();
79-
}
80-
}}
81-
/>
82-
<p className="text-muted-foreground text-xs">
83-
Press Cmd+Enter to start. The agent will attempt to fix the issue using{' '}
84-
<code className="text-[11px]">kilo run --auto</code>.
85-
</p>
86-
</div>
87-
88-
<DialogFooter>
89-
<Button variant="outline" onClick={() => handleOpenChange(false)}>
90-
Cancel
91-
</Button>
92-
<Button
93-
onClick={handleStart}
94-
disabled={!prompt.trim() || startMutation.isPending}
95-
className="bg-emerald-600 text-white hover:bg-emerald-700"
96-
>
97-
{startMutation.isPending ? (
98-
<>
99-
<Loader2 className="h-4 w-4 animate-spin" />
100-
Starting...
101-
</>
102-
) : (
103-
<>
104-
<Terminal className="h-4 w-4" />
105-
Run Recovery
106-
</>
112+
{needsRedeploy ? (
113+
<>
114+
<div className="flex items-start gap-3 rounded-md border border-amber-500/30 bg-amber-500/10 p-3">
115+
<AlertTriangle className="mt-0.5 h-5 w-5 shrink-0 text-amber-400" />
116+
<p className="text-sm text-amber-200">
117+
Your KiloClaw instance is running an older version that doesn&apos;t support the
118+
recovery agent. Upgrade to the latest version to use this feature.
119+
</p>
120+
</div>
121+
<DialogFooter>
122+
<Button
123+
variant="outline"
124+
onClick={() => handleOpenChange(false)}
125+
disabled={redeployMutation.isPending}
126+
>
127+
Cancel
128+
</Button>
129+
<Button
130+
className="border-amber-500/30 bg-amber-500/10 text-amber-400 hover:bg-amber-500/20 hover:text-amber-300"
131+
onClick={handleRedeploy}
132+
disabled={redeployMutation.isPending}
133+
>
134+
{redeployMutation.isPending ? (
135+
<>
136+
Upgrading
137+
<AnimatedDots />
138+
</>
139+
) : (
140+
<>
141+
<RotateCw className="h-4 w-4" />
142+
Upgrade &amp; Redeploy
143+
</>
144+
)}
145+
</Button>
146+
</DialogFooter>
147+
</>
148+
) : (
149+
<>
150+
{!machineReady && (
151+
<div className="flex items-start gap-3 rounded-md border border-blue-500/30 bg-blue-500/10 p-3">
152+
<Loader2 className="mt-0.5 h-5 w-5 shrink-0 animate-spin text-blue-400" />
153+
<p className="text-sm text-blue-200">
154+
Your instance is restarting. The recovery agent will be available once it&apos;s
155+
back online.
156+
</p>
157+
</div>
107158
)}
108-
</Button>
109-
</DialogFooter>
159+
<div className="space-y-2">
160+
<Textarea
161+
placeholder="Describe the problem you're trying to solve (e.g. &quot;I can't connect to the gateway&quot; or &quot;The bot's cron jobs aren't checking in&quot;)"
162+
value={prompt}
163+
onChange={e => setPrompt(e.target.value)}
164+
className="min-h-30 resize-none"
165+
maxLength={10_000}
166+
autoFocus
167+
disabled={!machineReady}
168+
onKeyDown={e => {
169+
if (e.key === 'Enter' && (e.metaKey || e.ctrlKey)) {
170+
e.preventDefault();
171+
handleStart();
172+
}
173+
}}
174+
/>
175+
<p className="text-muted-foreground text-xs">
176+
Press Cmd+Enter to start. The agent will attempt to fix the issue using{' '}
177+
<code className="text-[11px]">kilo run --auto</code>.
178+
</p>
179+
</div>
180+
181+
<DialogFooter>
182+
<Button variant="outline" onClick={() => handleOpenChange(false)}>
183+
Cancel
184+
</Button>
185+
<Button
186+
onClick={handleStart}
187+
disabled={!machineReady || !prompt.trim() || startMutation.isPending}
188+
className="bg-emerald-600 text-white hover:bg-emerald-700"
189+
>
190+
{startMutation.isPending ? (
191+
<>
192+
<Loader2 className="h-4 w-4 animate-spin" />
193+
Starting...
194+
</>
195+
) : (
196+
<>
197+
<Terminal className="h-4 w-4" />
198+
Run Recovery
199+
</>
200+
)}
201+
</Button>
202+
</DialogFooter>
203+
</>
204+
)}
110205
</DialogContent>
111206
</Dialog>
112207
);

src/routers/kiloclaw-router.ts

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,11 +1138,27 @@ export const kiloclawRouter = createTRPCRouter({
11381138
.mutation(async ({ ctx, input }) => {
11391139
const instance = await getActiveInstance(ctx.user.id);
11401140
const client = new KiloClawInternalClient();
1141-
const result = await client.startKiloCliRun(
1142-
ctx.user.id,
1143-
input.prompt,
1144-
workerInstanceId(instance)
1145-
);
1141+
1142+
let result;
1143+
try {
1144+
result = await client.startKiloCliRun(
1145+
ctx.user.id,
1146+
input.prompt,
1147+
workerInstanceId(instance)
1148+
);
1149+
} catch (err) {
1150+
if (err instanceof KiloClawApiError) {
1151+
const { code } = getKiloClawApiErrorPayload(err);
1152+
if (code === 'controller_route_unavailable') {
1153+
throw new TRPCError({
1154+
code: 'PRECONDITION_FAILED',
1155+
message: 'Instance needs redeploy to support recovery',
1156+
cause: new UpstreamApiError('controller_route_unavailable'),
1157+
});
1158+
}
1159+
}
1160+
throw err;
1161+
}
11461162

11471163
// Persist the run in the database and return its ID
11481164
const [row] = await db

0 commit comments

Comments
 (0)