Skip to content

Commit 074eb08

Browse files
committed
fix: handle errors thrown during rating
Currently if a rating throws an error, it interrupts the entire eval. These changes catch the error and mark the rating as skipped instead.
1 parent 74bfb91 commit 074eb08

File tree

2 files changed

+21
-21
lines changed

2 files changed

+21
-21
lines changed

runner/ratings/built-in-ratings/valid-css-rating.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export const validCssRating: PerFileRating = {
1515
kind: RatingKind.PER_FILE,
1616
id: 'common-valid-css',
1717
filter: PerFileRatingContentType.CSS,
18-
rate: async (code, _filePath) => {
18+
rate: async (code) => {
1919
const linterResult = await stylelint.lint({
2020
code: code,
2121
cwd: import.meta.dirname,

runner/ratings/rate-code.ts

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,19 @@ export async function rateGeneratedCode(
8484
for (const current of currentPromptDef.ratings) {
8585
let result: IndividualAssessment | SkippedIndividualAssessment;
8686

87-
if (current.kind === RatingKind.PER_BUILD) {
88-
result = runPerBuildRating(
89-
current,
90-
buildResult,
91-
repairAttempts,
92-
outputFiles.length,
93-
axeRepairAttempts
94-
);
95-
} else if (current.kind === RatingKind.PER_FILE) {
96-
categorizedFiles ??= splitFilesIntoCategories(outputFiles);
97-
result = await runPerFileRating(current, categorizedFiles);
98-
} else if (current.kind === RatingKind.LLM_BASED) {
99-
try {
87+
try {
88+
if (current.kind === RatingKind.PER_BUILD) {
89+
result = runPerBuildRating(
90+
current,
91+
buildResult,
92+
repairAttempts,
93+
outputFiles.length,
94+
axeRepairAttempts
95+
);
96+
} else if (current.kind === RatingKind.PER_FILE) {
97+
categorizedFiles ??= splitFilesIntoCategories(outputFiles);
98+
result = await runPerFileRating(current, categorizedFiles);
99+
} else if (current.kind === RatingKind.LLM_BASED) {
100100
result = await runLlmBasedRating(
101101
environment,
102102
current,
@@ -109,14 +109,14 @@ export async function rateGeneratedCode(
109109
axeRepairAttempts,
110110
abortSignal
111111
);
112-
} catch (error) {
113-
result = getSkippedAssessment(
114-
current,
115-
`Error during execution:\n${error}`
116-
);
112+
} else {
113+
throw new UserFacingError(`Unsupported rating type ${current}`);
117114
}
118-
} else {
119-
throw new UserFacingError(`Unsupported rating type ${current}`);
115+
} catch (error) {
116+
result = getSkippedAssessment(
117+
current,
118+
`Error during execution:\n${error}`
119+
);
120120
}
121121

122122
if (result.state === IndividualAssessmentState.EXECUTED && result.usage) {

0 commit comments

Comments
 (0)