@@ -152,60 +152,57 @@ export async function generateCodeAndAssess(options: {
152
152
153
153
for ( const rootPromptDef of promptsToProcess ) {
154
154
allTasks . push (
155
- appConcurrencyQueue . add (
156
- async ( ) => {
157
- const evalID = await env . gateway . initializeEval ( ) ;
158
- let results : AssessmentResult [ ] | undefined ;
159
-
160
- try {
161
- results = await callWithTimeout (
162
- `Evaluation of ${ rootPromptDef . name } ` ,
163
- async abortSignal =>
164
- startEvaluationTask (
165
- evalID ,
166
- env ,
167
- env . gateway ,
168
- ratingLlm ,
169
- options . model ,
170
- rootPromptDef ,
171
- options . localMode ,
172
- options . skipScreenshots ,
173
- options . outputDirectory ,
174
- options . ragEndpoint ,
175
- abortSignal ,
176
- options . skipAxeTesting ,
177
- ! ! options . enableUserJourneyTesting ,
178
- ! ! options . enableAutoCsp ,
179
- workerConcurrencyQueue ,
180
- progress ,
181
- options . autoraterModel || DEFAULT_AUTORATER_MODEL_NAME ,
182
- options . a11yRepairAttempts ?? 0 ,
183
- ) ,
184
- // 10min max per app evaluation. We just want to make sure it never gets stuck.
185
- 10 ,
186
- ) ;
187
- return results ;
188
- } catch ( e : unknown ) {
189
- failedPrompts . push ( {
190
- promptName : rootPromptDef . name ,
191
- error : `${ e } ` ,
192
- stack : e instanceof Error ? e . stack : undefined ,
193
- } ) ;
194
-
195
- let details = `Error: ${ e } ` ;
196
- if ( e instanceof Error && e . stack ) {
197
- details += `\nStack: ${ e . stack } ` ;
198
- }
199
-
200
- progress . log ( rootPromptDef , 'error' , 'Failed to evaluate code' , details ) ;
201
- return [ ] satisfies AssessmentResult [ ] ;
202
- } finally {
203
- progress . evalFinished ( rootPromptDef , results || [ ] ) ;
204
- await env . gateway . finalizeEval ( evalID ) ;
155
+ appConcurrencyQueue . add ( async ( ) => {
156
+ const evalID = await env . gateway . initializeEval ( ) ;
157
+ let results : AssessmentResult [ ] | undefined ;
158
+
159
+ try {
160
+ results = await callWithTimeout (
161
+ `Evaluation of ${ rootPromptDef . name } ` ,
162
+ async abortSignal =>
163
+ startEvaluationTask (
164
+ evalID ,
165
+ env ,
166
+ env . gateway ,
167
+ ratingLlm ,
168
+ options . model ,
169
+ rootPromptDef ,
170
+ options . localMode ,
171
+ options . skipScreenshots ,
172
+ options . outputDirectory ,
173
+ options . ragEndpoint ,
174
+ abortSignal ,
175
+ options . skipAxeTesting ,
176
+ ! ! options . enableUserJourneyTesting ,
177
+ ! ! options . enableAutoCsp ,
178
+ workerConcurrencyQueue ,
179
+ progress ,
180
+ options . autoraterModel || DEFAULT_AUTORATER_MODEL_NAME ,
181
+ options . a11yRepairAttempts ?? 0 ,
182
+ ) ,
183
+ // 10min max per app evaluation. We just want to make sure it never gets stuck.
184
+ 10 ,
185
+ ) ;
186
+ return results ;
187
+ } catch ( e : unknown ) {
188
+ failedPrompts . push ( {
189
+ promptName : rootPromptDef . name ,
190
+ error : `${ e } ` ,
191
+ stack : e instanceof Error ? e . stack : undefined ,
192
+ } ) ;
193
+
194
+ let details = `Error: ${ e } ` ;
195
+ if ( e instanceof Error && e . stack ) {
196
+ details += `\nStack: ${ e . stack } ` ;
205
197
}
206
- } ,
207
- { throwOnTimeout : true } ,
208
- ) ,
198
+
199
+ progress . log ( rootPromptDef , 'error' , 'Failed to evaluate code' , details ) ;
200
+ return [ ] satisfies AssessmentResult [ ] ;
201
+ } finally {
202
+ progress . evalFinished ( rootPromptDef , results || [ ] ) ;
203
+ await env . gateway . finalizeEval ( evalID ) ;
204
+ }
205
+ } ) ,
209
206
) ;
210
207
}
211
208
0 commit comments