Skip to content

Commit 1b5772e

Browse files
authored
Merge pull request #60 from mcintyre94/investigate-reconnect-on-disconnect
Fix reconnect loop dropping final messages when service stops mid-reconnect
2 parents ddc25ff + ae29b23 commit 1b5772e

File tree

3 files changed

+135
-6
lines changed

3 files changed

+135
-6
lines changed

Wisp/Services/SpritesAPIClient.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,26 @@ final class SpritesAPIClient {
280280
return try await request(method: "GET", path: "/sprites/\(spriteName)/services/\(serviceName)")
281281
}
282282

283+
// ServiceLogsProvider conformance — bridges the default-argument version to the protocol signature.
284+
func streamServiceLogs(spriteName: String, serviceName: String) -> AsyncThrowingStream<ServiceLogEvent, Error> {
285+
streamServiceLogs(spriteName: spriteName, serviceName: serviceName, duration: "3600s")
286+
}
287+
}
288+
289+
// MARK: - ServiceLogsProvider
290+
291+
/// Minimal protocol covering the two API calls used by the reconnect loop,
292+
/// allowing the loop to be tested without a live network connection.
293+
@MainActor
294+
protocol ServiceLogsProvider {
295+
func streamServiceLogs(spriteName: String, serviceName: String) -> AsyncThrowingStream<ServiceLogEvent, Error>
296+
func getServiceStatus(spriteName: String, serviceName: String) async throws -> ServiceInfo
297+
}
298+
299+
extension SpritesAPIClient: ServiceLogsProvider {}
300+
301+
extension SpritesAPIClient {
302+
283303
/// Delete a service (5s timeout to avoid blocking callers if sprite is unresponsive).
284304
func deleteService(spriteName: String, serviceName: String) async throws {
285305
let _: EmptyResponse = try await request(method: "DELETE", path: "/sprites/\(spriteName)/services/\(serviceName)", timeout: 5)

Wisp/ViewModels/ChatViewModel.swift

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -956,8 +956,11 @@ final class ChatViewModel {
956956
/// so existing content stays on screen with no flash. Only genuinely new events
957957
/// are appended. If the service is still running after a replay, polls and
958958
/// re-replays until the service stops or a result event arrives.
959-
private func reconnectToServiceLogs(
960-
apiClient: SpritesAPIClient,
959+
/// Core reconnect loop — fetches full log history on repeat until a result event
960+
/// arrives or the service is confirmed stopped. Separated from
961+
/// `reconnectToServiceLogs` so it can be tested against a mock API client.
962+
func runReconnectLoop(
963+
apiClient: some ServiceLogsProvider,
961964
modelContext: ModelContext
962965
) async {
963966
status = .reconnecting
@@ -986,6 +989,7 @@ final class ChatViewModel {
986989
// Replay loop — each iteration fetches full log history.
987990
// processServiceStream skips events whose UUID is already in
988991
// processedEventUUIDs, so content is never cleared mid-stream.
992+
var retriedAfterServiceStopped = false
989993
while !Task.isCancelled {
990994
receivedSystemEvent = false
991995
receivedResultEvent = false
@@ -1017,15 +1021,27 @@ final class ChatViewModel {
10171021
// If we got a result event, Claude is done
10181022
if receivedResultEvent { break }
10191023

1020-
// Check if service is still running before retrying
1021-
if let serviceInfo = try? await apiClient.getServiceStatus(spriteName: spriteName, serviceName: serviceName),
1022-
serviceInfo.state.status == "running" {
1024+
// Check if service is still running
1025+
let isRunning = (try? await apiClient.getServiceStatus(spriteName: spriteName, serviceName: serviceName))?.state.status == "running"
1026+
1027+
if isRunning {
10231028
logger.info("[Chat] Service still running, will re-poll after delay")
10241029
try? await Task.sleep(for: .seconds(2))
10251030
continue
10261031
}
10271032

1028-
// Service not running or status check failed — we're done
1033+
// Service has stopped (or status check failed / service gone). The GET stream
1034+
// may have been killed by iOS just as Claude finished writing its final events —
1035+
// a race between the connection dying and the result arriving. Allow one extra
1036+
// retry so we catch any events that landed in the log after the stream closed.
1037+
if !retriedAfterServiceStopped {
1038+
retriedAfterServiceStopped = true
1039+
logger.info("[Chat] Service stopped without result event — retrying once for final events")
1040+
try? await Task.sleep(for: .seconds(1))
1041+
continue
1042+
}
1043+
1044+
// Already retried after stop — give up
10291045
break
10301046
}
10311047

@@ -1039,6 +1055,13 @@ final class ChatViewModel {
10391055
status = .idle
10401056
}
10411057
persistMessages(modelContext: modelContext)
1058+
}
1059+
1060+
private func reconnectToServiceLogs(
1061+
apiClient: SpritesAPIClient,
1062+
modelContext: ModelContext
1063+
) async {
1064+
await runReconnectLoop(apiClient: apiClient, modelContext: modelContext)
10421065

10431066
if let queued = queuedPrompt, !Task.isCancelled {
10441067
let prompt = buildPrompt(text: queued, attachments: queuedAttachments)

WispTests/ChatViewModelTests.swift

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,33 @@ struct ChatViewModelTests {
2626
return (vm, chat)
2727
}
2828

29+
// MARK: - Mock API client
30+
31+
private final class MockServiceLogsProvider: ServiceLogsProvider {
32+
var streams: [AsyncThrowingStream<ServiceLogEvent, Error>]
33+
var statuses: [String]
34+
private(set) var streamCallCount = 0
35+
private(set) var statusCallCount = 0
36+
37+
init(streams: [AsyncThrowingStream<ServiceLogEvent, Error>], statuses: [String]) {
38+
self.streams = streams
39+
self.statuses = statuses
40+
}
41+
42+
func streamServiceLogs(spriteName: String, serviceName: String) -> AsyncThrowingStream<ServiceLogEvent, Error> {
43+
let idx = streamCallCount
44+
streamCallCount += 1
45+
return idx < streams.count ? streams[idx] : AsyncThrowingStream { $0.finish() }
46+
}
47+
48+
func getServiceStatus(spriteName: String, serviceName: String) async throws -> ServiceInfo {
49+
let idx = statusCallCount
50+
statusCallCount += 1
51+
let status = idx < statuses.count ? statuses[idx] : "stopped"
52+
return ServiceInfo(name: serviceName, state: ServiceInfo.ServiceState(status: status))
53+
}
54+
}
55+
2956
// MARK: - handleEvent: system
3057

3158
@Test func handleEvent_systemSetsModelName() throws {
@@ -723,6 +750,65 @@ struct ChatViewModelTests {
723750
#expect(vm.inputText == "")
724751
}
725752

753+
// MARK: - reconnectToServiceLogs: retriedAfterServiceStopped
754+
755+
@Test func reconnectToServiceLogs_retriesOnceWhenServiceStoppedWithNoResult_thenDeliversResult() async throws {
756+
let ctx = try makeModelContext()
757+
let (vm, _) = makeChatViewModel(modelContext: ctx)
758+
759+
// First stream: delivers a system event but no result — simulates the
760+
// stream dying just before Claude finishes.
761+
let systemLine = #"{"type":"system","session_id":"s1","model":"claude-sonnet-4-20250514"}"# + "\n"
762+
let stream1 = AsyncThrowingStream<ServiceLogEvent, Error> { continuation in
763+
continuation.yield(ServiceLogEvent(type: .stdout, data: systemLine, exitCode: nil, timestamp: nil, logFiles: nil))
764+
continuation.finish()
765+
}
766+
767+
// Second stream: delivers the result event that landed after the first stream closed.
768+
let resultLine = #"{"type":"result","session_id":"s1","subtype":"success"}"# + "\n"
769+
let stream2 = AsyncThrowingStream<ServiceLogEvent, Error> { continuation in
770+
continuation.yield(ServiceLogEvent(type: .stdout, data: systemLine, exitCode: nil, timestamp: nil, logFiles: nil))
771+
continuation.yield(ServiceLogEvent(type: .stdout, data: resultLine, exitCode: nil, timestamp: nil, logFiles: nil))
772+
continuation.finish()
773+
}
774+
775+
let mock = MockServiceLogsProvider(streams: [stream1, stream2], statuses: ["stopped"])
776+
777+
await vm.runReconnectLoop(apiClient: mock, modelContext: ctx)
778+
779+
#expect(mock.streamCallCount == 2, "Should replay logs twice: once on initial reconnect, once on retry")
780+
#expect(mock.statusCallCount == 1, "Should only check status once (before the retry)")
781+
guard case .idle = vm.status else {
782+
Issue.record("Expected idle status after reconnect completes, got \(vm.status)")
783+
return
784+
}
785+
}
786+
787+
@Test func reconnectToServiceLogs_givesUpAfterOneRetryWhenServiceStillStopped() async throws {
788+
let ctx = try makeModelContext()
789+
let (vm, _) = makeChatViewModel(modelContext: ctx)
790+
791+
// Both streams return no result event, and the service stays stopped.
792+
let systemLine = #"{"type":"system","session_id":"s1","model":"claude-sonnet-4-20250514"}"# + "\n"
793+
let makeStream = {
794+
AsyncThrowingStream<ServiceLogEvent, Error> { continuation in
795+
continuation.yield(ServiceLogEvent(type: .stdout, data: systemLine, exitCode: nil, timestamp: nil, logFiles: nil))
796+
continuation.finish()
797+
}
798+
}
799+
800+
let mock = MockServiceLogsProvider(streams: [makeStream(), makeStream()], statuses: ["stopped", "stopped"])
801+
802+
await vm.runReconnectLoop(apiClient: mock, modelContext: ctx)
803+
804+
#expect(mock.streamCallCount == 2, "Should attempt exactly two replays: initial + one retry")
805+
#expect(mock.statusCallCount == 2, "Should check status once per iteration that yields no result event")
806+
guard case .idle = vm.status else {
807+
Issue.record("Expected idle status after giving up, got \(vm.status)")
808+
return
809+
}
810+
}
811+
726812
@Test func stashDraft_leavesInputReadyForNextMessage() throws {
727813
let ctx = try makeModelContext()
728814
let (vm, _) = makeChatViewModel(modelContext: ctx)

0 commit comments

Comments
 (0)