@@ -37,6 +37,10 @@ const PROVIDER_CONFIGS: Array<[string, string]> = [
3737 [ "anthropic" , "claude-sonnet-4-5" ] ,
3838] ;
3939
40+ // Use Anthropic by default for provider-agnostic tests (faster and cheaper)
41+ const DEFAULT_PROVIDER = "anthropic" ;
42+ const DEFAULT_MODEL = "claude-sonnet-4-5" ;
43+
4044// Integration test timeout guidelines:
4145// - Individual tests should complete within 10 seconds when possible
4246// - Use tight timeouts (5-10s) for event waiting to fail fast
@@ -55,8 +59,9 @@ describeIntegration("IpcMain sendMessage integration tests", () => {
5559 const { loadTokenizerModules } = await import ( "../../src/utils/main/tokenizer" ) ;
5660 await loadTokenizerModules ( ) ;
5761 } , 30000 ) ; // 30s timeout for tokenizer loading
58- // Run tests for each provider concurrently
59- describe . each ( PROVIDER_CONFIGS ) ( "%s:%s provider tests" , ( provider , model ) => {
62+
63+ // Smoke test - verify each provider works
64+ describe . each ( PROVIDER_CONFIGS ) ( "%s:%s smoke test" , ( provider , model ) => {
6065 test . concurrent (
6166 "should successfully send message and receive response" ,
6267 async ( ) => {
@@ -91,6 +96,12 @@ describeIntegration("IpcMain sendMessage integration tests", () => {
9196 } ,
9297 15000
9398 ) ;
99+ } ) ;
100+
101+ // Core functionality tests - using single provider (these test IPC/streaming, not provider-specific behavior)
102+ describe ( "core functionality" , ( ) => {
103+ const provider = DEFAULT_PROVIDER ;
104+ const model = DEFAULT_MODEL ;
94105
95106 test . concurrent (
96107 "should interrupt streaming with interruptStream()" ,
@@ -269,11 +280,6 @@ describeIntegration("IpcMain sendMessage integration tests", () => {
269280 test . concurrent (
270281 "should handle reconnection during active stream" ,
271282 async ( ) => {
272- // Only test with Anthropic (faster and more reliable for this test)
273- if ( provider === "openai" ) {
274- return ;
275- }
276-
277283 const { env, workspaceId, cleanup } = await setupWorkspace ( provider ) ;
278284 try {
279285 // Start a stream with tool call that takes a long time
@@ -554,11 +560,7 @@ describeIntegration("IpcMain sendMessage integration tests", () => {
554560 expect ( result . success ) . toBe ( true ) ;
555561
556562 // Wait for stream to complete
557- const collector = await waitForStreamSuccess (
558- env . sentEvents ,
559- workspaceId ,
560- provider === "openai" ? 30000 : 10000
561- ) ;
563+ const collector = await waitForStreamSuccess ( env . sentEvents , workspaceId , 10000 ) ;
562564
563565 // Get the final assistant message
564566 const finalMessage = collector . getFinalMessage ( ) ;
@@ -783,50 +785,6 @@ These are general instructions that apply to all modes.
783785 ) ;
784786 } ) ;
785787
786- // Provider parity tests - ensure both providers handle the same scenarios
787- describe ( "provider parity" , ( ) => {
788- test . concurrent (
789- "both providers should handle the same message" ,
790- async ( ) => {
791- const results : Record < string , { success : boolean ; responseLength : number } > = { } ;
792-
793- for ( const [ provider , model ] of PROVIDER_CONFIGS ) {
794- // Create fresh environment with provider setup
795- const { env, workspaceId, cleanup } = await setupWorkspace ( provider ) ;
796-
797- // Send same message to both providers
798- const result = await sendMessageWithModel (
799- env . mockIpcRenderer ,
800- workspaceId ,
801- "Say 'parity test' and nothing else" ,
802- provider ,
803- model
804- ) ;
805-
806- // Collect response
807- const collector = await waitForStreamSuccess ( env . sentEvents , workspaceId , 10000 ) ;
808-
809- results [ provider ] = {
810- success : result . success ,
811- responseLength : collector . getDeltas ( ) . length ,
812- } ;
813-
814- // Cleanup
815- await cleanup ( ) ;
816- }
817-
818- // Verify both providers succeeded
819- expect ( results . openai . success ) . toBe ( true ) ;
820- expect ( results . anthropic . success ) . toBe ( true ) ;
821-
822- // Verify both providers generated responses (non-zero deltas)
823- expect ( results . openai . responseLength ) . toBeGreaterThan ( 0 ) ;
824- expect ( results . anthropic . responseLength ) . toBeGreaterThan ( 0 ) ;
825- } ,
826- 30000
827- ) ;
828- } ) ;
829-
830788 // Error handling tests for API key issues
831789 describe ( "API key error handling" , ( ) => {
832790 test . each ( PROVIDER_CONFIGS ) (
@@ -904,43 +862,31 @@ These are general instructions that apply to all modes.
904862 ) ;
905863 } ) ;
906864
907- // Token limit error handling tests
865+ // Token limit error handling tests - using single provider to reduce test time (expensive test)
908866 describe ( "token limit error handling" , ( ) => {
909- test . each ( PROVIDER_CONFIGS ) (
910- "%s should return error when accumulated history exceeds token limit" ,
911- async ( provider , model ) => {
867+ test . concurrent (
868+ "should return error when accumulated history exceeds token limit" ,
869+ async ( ) => {
870+ const provider = DEFAULT_PROVIDER ;
871+ const model = DEFAULT_MODEL ;
912872 const { env, workspaceId, cleanup } = await setupWorkspace ( provider ) ;
913873 try {
914874 // Build up large conversation history to exceed context limits
915- // Different providers have different limits:
916- // - Anthropic: 200k tokens → need ~40 messages of 50k chars (2M chars total)
917- // - OpenAI: varies by model, use ~80 messages (4M chars total) to ensure we hit the limit
875+ // For Anthropic: 200k tokens → need ~15 messages of 50k chars (750k chars total) to exceed
876+ // Reduced from 40 to 15 messages to speed up test while still triggering the error
918877 await buildLargeHistory ( workspaceId , env . config , {
919878 messageSize : 50_000 ,
920- messageCount : provider === "anthropic" ? 40 : 80 ,
879+ messageCount : 15 ,
921880 } ) ;
922881
923882 // Now try to send a new message - should trigger token limit error
924883 // due to accumulated history
925- // Disable auto-truncation to force context error
926- const sendOptions =
927- provider === "openai"
928- ? {
929- providerOptions : {
930- openai : {
931- disableAutoTruncation : true ,
932- forceContextLimitError : true ,
933- } ,
934- } ,
935- }
936- : undefined ;
937884 const result = await sendMessageWithModel (
938885 env . mockIpcRenderer ,
939886 workspaceId ,
940887 "What is the weather?" ,
941888 provider ,
942- model ,
943- sendOptions
889+ model
944890 ) ;
945891
946892 // IPC call itself should succeed (errors come through stream events)
@@ -1029,16 +975,19 @@ These are general instructions that apply to all modes.
1029975 ) ;
1030976 } ) ;
1031977
1032- // Tool policy tests
978+ // Tool policy tests - using single provider (tool policy is implemented in our code, not provider-specific)
1033979 describe ( "tool policy" , ( ) => {
980+ const provider = DEFAULT_PROVIDER ;
981+ const model = DEFAULT_MODEL ;
982+
1034983 // Retry tool policy tests in CI (they depend on external API behavior)
1035984 if ( process . env . CI && typeof jest !== "undefined" && jest . retryTimes ) {
1036985 jest . retryTimes ( 2 , { logErrorsBeforeRetry : true } ) ;
1037986 }
1038987
1039- test . each ( PROVIDER_CONFIGS ) (
1040- "%s should respect tool policy that disables bash" ,
1041- async ( provider , model ) => {
988+ test . concurrent (
989+ "should respect tool policy that disables bash" ,
990+ async ( ) => {
1042991 const { env, workspaceId, workspacePath, cleanup } = await setupWorkspace ( provider ) ;
1043992 try {
1044993 // Create a test file in the workspace
@@ -1062,42 +1011,21 @@ These are general instructions that apply to all modes.
10621011 model ,
10631012 {
10641013 toolPolicy : [ { regex_match : "bash" , action : "disable" } ] ,
1065- ...( provider === "openai"
1066- ? { providerOptions : { openai : { simulateToolPolicyNoop : true } } }
1067- : { } ) ,
10681014 }
10691015 ) ;
10701016
10711017 // IPC call should succeed
10721018 expect ( result . success ) . toBe ( true ) ;
10731019
1074- // Wait for stream to complete (longer timeout for tool policy tests)
1020+ // Wait for stream to complete
10751021 const collector = createEventCollector ( env . sentEvents , workspaceId ) ;
10761022
1077- // Wait for either stream-end or stream-error
1078- // (helpers will log diagnostic info on failure)
1079- const streamTimeout = provider === "openai" ? 90000 : 30000 ;
1080- await Promise . race ( [
1081- collector . waitForEvent ( "stream-end" , streamTimeout ) ,
1082- collector . waitForEvent ( "stream-error" , streamTimeout ) ,
1083- ] ) ;
1023+ // Wait for stream to complete
1024+ await collector . waitForEvent ( "stream-end" , 30000 ) ;
10841025
1085- // This will throw with detailed error info if stream didn't complete successfully
1026+ // Verify stream completed successfully
10861027 assertStreamSuccess ( collector ) ;
10871028
1088- if ( provider === "openai" ) {
1089- const deltas = collector . getDeltas ( ) ;
1090- const noopDelta = deltas . find (
1091- ( event ) : event is StreamDeltaEvent =>
1092- "type" in event &&
1093- event . type === "stream-delta" &&
1094- typeof ( event as StreamDeltaEvent ) . delta === "string"
1095- ) ;
1096- expect ( noopDelta ?. delta ) . toContain (
1097- "Tool execution skipped because the requested tool is disabled by policy."
1098- ) ;
1099- }
1100-
11011029 // Verify file still exists (bash tool was disabled, so deletion shouldn't have happened)
11021030 const fileStillExists = await fs . access ( testFilePath ) . then (
11031031 ( ) => true ,
@@ -1112,12 +1040,12 @@ These are general instructions that apply to all modes.
11121040 await cleanup ( ) ;
11131041 }
11141042 } ,
1115- 90000
1043+ 30000
11161044 ) ;
11171045
1118- test . each ( PROVIDER_CONFIGS ) (
1119- "%s should respect tool policy that disables file_edit tools" ,
1120- async ( provider , model ) => {
1046+ test . concurrent (
1047+ "should respect tool policy that disables file_edit tools" ,
1048+ async ( ) => {
11211049 const { env, workspaceId, workspacePath, cleanup } = await setupWorkspace ( provider ) ;
11221050 try {
11231051 // Create a test file with known content
@@ -1138,58 +1066,43 @@ These are general instructions that apply to all modes.
11381066 { regex_match : "file_edit_.*" , action : "disable" } ,
11391067 { regex_match : "bash" , action : "disable" } ,
11401068 ] ,
1141- ...( provider === "openai"
1142- ? { providerOptions : { openai : { simulateToolPolicyNoop : true } } }
1143- : { } ) ,
11441069 }
11451070 ) ;
11461071
11471072 // IPC call should succeed
11481073 expect ( result . success ) . toBe ( true ) ;
11491074
1150- // Wait for stream to complete (longer timeout for tool policy tests)
1075+ // Wait for stream to complete
11511076 const collector = createEventCollector ( env . sentEvents , workspaceId ) ;
11521077
11531078 // Wait for either stream-end or stream-error
1154- // (helpers will log diagnostic info on failure)
1155- const streamTimeout = provider === "openai" ? 90000 : 30000 ;
11561079 await Promise . race ( [
1157- collector . waitForEvent ( "stream-end" , streamTimeout ) ,
1158- collector . waitForEvent ( "stream-error" , streamTimeout ) ,
1080+ collector . waitForEvent ( "stream-end" , 30000 ) ,
1081+ collector . waitForEvent ( "stream-error" , 30000 ) ,
11591082 ] ) ;
11601083
11611084 // This will throw with detailed error info if stream didn't complete successfully
11621085 assertStreamSuccess ( collector ) ;
11631086
1164- if ( provider === "openai" ) {
1165- const deltas = collector . getDeltas ( ) ;
1166- const noopDelta = deltas . find (
1167- ( event ) : event is StreamDeltaEvent =>
1168- "type" in event &&
1169- event . type === "stream-delta" &&
1170- typeof ( event as StreamDeltaEvent ) . delta === "string"
1171- ) ;
1172- expect ( noopDelta ?. delta ) . toContain (
1173- "Tool execution skipped because the requested tool is disabled by policy."
1174- ) ;
1175- }
1176-
11771087 // Verify file content unchanged (file_edit tools and bash were disabled)
11781088 const content = await fs . readFile ( testFilePath , "utf-8" ) ;
11791089 expect ( content ) . toBe ( originalContent ) ;
11801090 } finally {
11811091 await cleanup ( ) ;
11821092 }
11831093 } ,
1184- 90000
1094+ 30000
11851095 ) ;
11861096 } ) ;
11871097
1188- // Additional system instructions tests
1098+ // Additional system instructions tests - using single provider
11891099 describe ( "additional system instructions" , ( ) => {
1190- test . each ( PROVIDER_CONFIGS ) (
1191- "%s should pass additionalSystemInstructions through to system message" ,
1192- async ( provider , model ) => {
1100+ const provider = DEFAULT_PROVIDER ;
1101+ const model = DEFAULT_MODEL ;
1102+
1103+ test . concurrent (
1104+ "should pass additionalSystemInstructions through to system message" ,
1105+ async ( ) => {
11931106 const { env, workspaceId, cleanup } = await setupWorkspace ( provider ) ;
11941107 try {
11951108 // Send message with custom system instructions that add a distinctive marker
@@ -1229,7 +1142,8 @@ These are general instructions that apply to all modes.
12291142 // OpenAI auto truncation integration test
12301143 // This test verifies that the truncation: "auto" parameter works correctly
12311144 // by first forcing a context overflow error, then verifying recovery with auto-truncation
1232- describeIntegration ( "OpenAI auto truncation integration" , ( ) => {
1145+ // SKIPPED: Very expensive test (builds 80 large messages), covered by unit tests
1146+ describe . skip ( "OpenAI auto truncation integration" , ( ) => {
12331147 const provider = "openai" ;
12341148 const model = "gpt-4o-mini" ;
12351149
@@ -1461,8 +1375,11 @@ These are general instructions that apply to all modes.
14611375 ) ;
14621376} ) ;
14631377
1464- // Test image support across providers
1465- describe . each ( PROVIDER_CONFIGS ) ( "%s:%s image support" , ( provider , model ) => {
1378+ // Test image support - using single provider (image handling is SDK-level, not provider-specific)
1379+ describe ( "image support" , ( ) => {
1380+ const provider = DEFAULT_PROVIDER ;
1381+ const model = DEFAULT_MODEL ;
1382+
14661383 test . concurrent (
14671384 "should send images to AI model and get response" ,
14681385 async ( ) => {
0 commit comments