Skip to content

Commit ea718de

Browse files
committed
feat: add persistent retry queue for failed telemetry events (#4940)
- Implement TelemetryQueue class with VSCode globalState persistence - Add exponential backoff retry logic (1s to 5min) - Queue failed events from PostHog and Cloud telemetry clients - Implement queue size limits (1000 events max, ~1MB disk usage) - Add graceful shutdown with 5-second timeout - Ensure queue persists across extension restarts - Add comprehensive test coverage (99 tests passing)
1 parent 181993f commit ea718de

File tree

11 files changed

+1794
-37
lines changed

11 files changed

+1794
-37
lines changed

packages/cloud/src/TelemetryClient.ts

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -58,29 +58,46 @@ export class TelemetryClient extends BaseTelemetryClient {
5858
return
5959
}
6060

61-
const payload = {
62-
type: event.event,
63-
properties: await this.getEventProperties(event),
64-
}
61+
// Try to send directly first
62+
const success = await this.captureWithRetry(event)
6563

66-
if (this.debug) {
67-
console.info(`[TelemetryClient#capture] ${JSON.stringify(payload)}`)
64+
// If failed and queue is available, add to queue
65+
if (!success && this.queue) {
66+
await this.queue.addEvent(event, "cloud")
6867
}
68+
}
6969

70-
const result = rooCodeTelemetryEventSchema.safeParse(payload)
70+
/**
71+
* Attempts to capture an event with retry capability
72+
* @param event The telemetry event to capture
73+
* @returns True if the event was successfully sent, false if it should be retried
74+
*/
75+
protected override async captureWithRetry(event: TelemetryEvent): Promise<boolean> {
76+
try {
77+
const payload = {
78+
type: event.event,
79+
properties: await this.getEventProperties(event),
80+
}
7181

72-
if (!result.success) {
73-
console.error(
74-
`[TelemetryClient#capture] Invalid telemetry event: ${result.error.message} - ${JSON.stringify(payload)}`,
75-
)
82+
if (this.debug) {
83+
console.info(`[TelemetryClient#captureWithRetry] ${JSON.stringify(payload)}`)
84+
}
7685

77-
return
78-
}
86+
const result = rooCodeTelemetryEventSchema.safeParse(payload)
87+
88+
if (!result.success) {
89+
console.error(
90+
`[TelemetryClient#captureWithRetry] Invalid telemetry event: ${result.error.message} - ${JSON.stringify(payload)}`,
91+
)
92+
// Don't retry invalid events
93+
return true
94+
}
7995

80-
try {
8196
await this.fetch(`events`, { method: "POST", body: JSON.stringify(result.data) })
82-
} catch (error) {
83-
console.error(`[TelemetryClient#capture] Error sending telemetry event: ${error}`)
97+
return true
98+
} catch (_error) {
99+
// Return false to trigger queue retry
100+
return false
84101
}
85102
}
86103

packages/cloud/src/__tests__/TelemetryClient.test.ts

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,4 +735,217 @@ describe("TelemetryClient", () => {
735735
expect(fileContent).toBe("[]")
736736
})
737737
})
738+
739+
describe("captureWithRetry", () => {
740+
it("should return true when event is captured successfully", async () => {
741+
const client = new TelemetryClient(mockAuthService, mockSettingsService)
742+
743+
const providerProperties = {
744+
appName: "roo-code",
745+
appVersion: "1.0.0",
746+
vscodeVersion: "1.60.0",
747+
platform: "darwin",
748+
editorName: "vscode",
749+
language: "en",
750+
mode: "code",
751+
}
752+
753+
const mockProvider: TelemetryPropertiesProvider = {
754+
getTelemetryProperties: vi.fn().mockResolvedValue(providerProperties),
755+
}
756+
757+
client.setProvider(mockProvider)
758+
759+
const captureWithRetry = getPrivateProperty<
760+
(event: { event: TelemetryEventName; properties?: Record<string, any> }) => Promise<boolean>
761+
>(client, "captureWithRetry").bind(client)
762+
763+
const result = await captureWithRetry({
764+
event: TelemetryEventName.TASK_CREATED,
765+
properties: { taskId: "test-task-id" },
766+
})
767+
768+
expect(result).toBe(true)
769+
expect(mockFetch).toHaveBeenCalled()
770+
})
771+
772+
it("should return true for invalid events (don't retry invalid events)", async () => {
773+
const client = new TelemetryClient(mockAuthService, mockSettingsService)
774+
775+
const captureWithRetry = getPrivateProperty<
776+
(event: { event: TelemetryEventName; properties?: Record<string, any> }) => Promise<boolean>
777+
>(client, "captureWithRetry").bind(client)
778+
779+
const result = await captureWithRetry({
780+
event: TelemetryEventName.TASK_CREATED,
781+
properties: { test: "value" }, // Invalid properties
782+
})
783+
784+
expect(result).toBe(true) // Don't retry invalid events
785+
expect(mockFetch).not.toHaveBeenCalled()
786+
expect(console.error).toHaveBeenCalledWith(expect.stringContaining("Invalid telemetry event"))
787+
})
788+
789+
it("should return false when fetch fails", async () => {
790+
const client = new TelemetryClient(mockAuthService, mockSettingsService)
791+
792+
mockFetch.mockRejectedValue(new Error("Network error"))
793+
794+
const providerProperties = {
795+
appName: "roo-code",
796+
appVersion: "1.0.0",
797+
vscodeVersion: "1.60.0",
798+
platform: "darwin",
799+
editorName: "vscode",
800+
language: "en",
801+
mode: "code",
802+
}
803+
804+
const mockProvider: TelemetryPropertiesProvider = {
805+
getTelemetryProperties: vi.fn().mockResolvedValue(providerProperties),
806+
}
807+
808+
client.setProvider(mockProvider)
809+
810+
const captureWithRetry = getPrivateProperty<
811+
(event: { event: TelemetryEventName; properties?: Record<string, any> }) => Promise<boolean>
812+
>(client, "captureWithRetry").bind(client)
813+
814+
const result = await captureWithRetry({
815+
event: TelemetryEventName.TASK_CREATED,
816+
properties: { taskId: "test-task-id" },
817+
})
818+
819+
expect(result).toBe(false)
820+
})
821+
})
822+
823+
describe("queue integration", () => {
824+
it("should add event to queue when captureWithRetry fails", async () => {
825+
const client = new TelemetryClient(mockAuthService, mockSettingsService)
826+
827+
// Create a mock queue
828+
const mockQueue = {
829+
addEvent: vi.fn(),
830+
} as any
831+
832+
client.setQueue(mockQueue)
833+
834+
// Make captureWithRetry fail
835+
mockFetch.mockRejectedValue(new Error("Network error"))
836+
837+
const providerProperties = {
838+
appName: "roo-code",
839+
appVersion: "1.0.0",
840+
vscodeVersion: "1.60.0",
841+
platform: "darwin",
842+
editorName: "vscode",
843+
language: "en",
844+
mode: "code",
845+
}
846+
847+
const mockProvider: TelemetryPropertiesProvider = {
848+
getTelemetryProperties: vi.fn().mockResolvedValue(providerProperties),
849+
}
850+
851+
client.setProvider(mockProvider)
852+
853+
await client.capture({
854+
event: TelemetryEventName.TASK_CREATED,
855+
properties: { taskId: "test-task-id" },
856+
})
857+
858+
expect(mockQueue.addEvent).toHaveBeenCalledWith(
859+
{
860+
event: TelemetryEventName.TASK_CREATED,
861+
properties: { taskId: "test-task-id" },
862+
},
863+
"cloud",
864+
)
865+
})
866+
867+
it("should not add event to queue when captureWithRetry succeeds", async () => {
868+
const client = new TelemetryClient(mockAuthService, mockSettingsService)
869+
870+
// Create a mock queue
871+
const mockQueue = {
872+
addEvent: vi.fn(),
873+
} as any
874+
875+
client.setQueue(mockQueue)
876+
877+
const providerProperties = {
878+
appName: "roo-code",
879+
appVersion: "1.0.0",
880+
vscodeVersion: "1.60.0",
881+
platform: "darwin",
882+
editorName: "vscode",
883+
language: "en",
884+
mode: "code",
885+
}
886+
887+
const mockProvider: TelemetryPropertiesProvider = {
888+
getTelemetryProperties: vi.fn().mockResolvedValue(providerProperties),
889+
}
890+
891+
client.setProvider(mockProvider)
892+
893+
await client.capture({
894+
event: TelemetryEventName.TASK_CREATED,
895+
properties: { taskId: "test-task-id" },
896+
})
897+
898+
expect(mockQueue.addEvent).not.toHaveBeenCalled()
899+
})
900+
901+
it("should not fail when queue is not set", async () => {
902+
const client = new TelemetryClient(mockAuthService, mockSettingsService)
903+
904+
// Make captureWithRetry fail
905+
mockFetch.mockRejectedValue(new Error("Network error"))
906+
907+
const providerProperties = {
908+
appName: "roo-code",
909+
appVersion: "1.0.0",
910+
vscodeVersion: "1.60.0",
911+
platform: "darwin",
912+
editorName: "vscode",
913+
language: "en",
914+
mode: "code",
915+
}
916+
917+
const mockProvider: TelemetryPropertiesProvider = {
918+
getTelemetryProperties: vi.fn().mockResolvedValue(providerProperties),
919+
}
920+
921+
client.setProvider(mockProvider)
922+
923+
// Should not throw even without queue
924+
await expect(
925+
client.capture({
926+
event: TelemetryEventName.TASK_CREATED,
927+
properties: { taskId: "test-task-id" },
928+
}),
929+
).resolves.toBeUndefined()
930+
})
931+
932+
it("should not add invalid events to queue", async () => {
933+
const client = new TelemetryClient(mockAuthService, mockSettingsService)
934+
935+
// Create a mock queue
936+
const mockQueue = {
937+
addEvent: vi.fn(),
938+
} as any
939+
940+
client.setQueue(mockQueue)
941+
942+
await client.capture({
943+
event: TelemetryEventName.TASK_CREATED,
944+
properties: { test: "value" }, // Invalid properties
945+
})
946+
947+
// Should not add invalid events to queue
948+
expect(mockQueue.addEvent).not.toHaveBeenCalled()
949+
})
950+
})
738951
})

packages/telemetry/src/BaseTelemetryClient.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ import {
55
TelemetryPropertiesProvider,
66
TelemetryEventSubscription,
77
} from "@roo-code/types"
8+
import { TelemetryQueue } from "./TelemetryQueue"
89

910
export abstract class BaseTelemetryClient implements TelemetryClient {
1011
protected providerRef: WeakRef<TelemetryPropertiesProvider> | null = null
1112
protected telemetryEnabled: boolean = false
13+
protected queue?: TelemetryQueue
1214

1315
constructor(
1416
public readonly subscription?: TelemetryEventSubscription,
@@ -59,6 +61,29 @@ export abstract class BaseTelemetryClient implements TelemetryClient {
5961

6062
public abstract capture(event: TelemetryEvent): Promise<void>
6163

64+
/**
65+
* Attempts to capture an event with retry capability
66+
* @param event The telemetry event to capture
67+
* @returns True if the event was successfully sent, false if it should be retried
68+
*/
69+
protected abstract captureWithRetry(event: TelemetryEvent): Promise<boolean>
70+
71+
/**
72+
* Gets the queue instance if available
73+
* @returns The TelemetryQueue instance or undefined
74+
*/
75+
protected getQueue(): TelemetryQueue | undefined {
76+
return this.queue
77+
}
78+
79+
/**
80+
* Sets the queue instance for this client
81+
* @param queue The TelemetryQueue instance
82+
*/
83+
public setQueue(queue: TelemetryQueue): void {
84+
this.queue = queue
85+
}
86+
6287
public setProvider(provider: TelemetryPropertiesProvider): void {
6388
this.providerRef = new WeakRef(provider)
6489
}

packages/telemetry/src/PostHogTelemetryClient.ts

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,37 @@ export class PostHogTelemetryClient extends BaseTelemetryClient {
5454
console.info(`[PostHogTelemetryClient#capture] ${event.event}`)
5555
}
5656

57-
this.client.capture({
58-
distinctId: this.distinctId,
59-
event: event.event,
60-
properties: await this.getEventProperties(event),
61-
})
57+
// Try to send directly first
58+
const success = await this.captureWithRetry(event)
59+
60+
// If failed and queue is available, add to queue
61+
if (!success && this.queue) {
62+
await this.queue.addEvent(event, "posthog")
63+
}
64+
}
65+
66+
/**
67+
* Attempts to capture an event with retry capability
68+
* @param event The telemetry event to capture
69+
* @returns True if the event was successfully sent, false if it should be retried
70+
*/
71+
protected override async captureWithRetry(event: TelemetryEvent): Promise<boolean> {
72+
try {
73+
// PostHog client has its own internal queue, but we need to detect if it's failing
74+
// We'll wrap the capture call and check for errors
75+
this.client.capture({
76+
distinctId: this.distinctId,
77+
event: event.event,
78+
properties: await this.getEventProperties(event),
79+
})
80+
81+
// PostHog's capture is async but doesn't return a promise by default
82+
// We assume success - PostHog has its own internal queue
83+
return true
84+
} catch (_error) {
85+
// Return false to trigger queue retry
86+
return false
87+
}
6288
}
6389

6490
/**

0 commit comments

Comments
 (0)