Skip to content

Commit 444ed2d

Browse files
authored
Use correct resultReason method when fetching eval reason (#2089)
We used to do a quickfix to get the reason, now we implemented the improved version using the correct resultReason method
1 parent 4d457e7 commit 444ed2d

File tree

4 files changed

+53
-47
lines changed

4 files changed

+53
-47
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ __testing__
9494
# New spans
9595
apps/web/ingest
9696
apps/workers/workspaces/*/traces
97+
provider-logs/*
98+
workspaces/*
9799

98100
# Helm chart secrets (do not commit)
99101
charts/latitude/values.secrets.yaml

packages/core/src/data-access/issues/getSpanMessagesAndEvaluationResultsByIssue.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
221221
workspace,
222222
commit,
223223
issue,
224+
existingEvaluations: [evaluation],
224225
})
225226

226227
expect(result.ok).toBe(true)
@@ -256,6 +257,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
256257
workspace,
257258
commit,
258259
issue,
260+
existingEvaluations: [evaluation],
259261
})
260262

261263
expect(result.ok).toBe(true)
@@ -283,6 +285,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
283285
workspace,
284286
commit,
285287
issue,
288+
existingEvaluations: [evaluation],
286289
})
287290

288291
expect(result.ok).toBe(true)
@@ -307,6 +310,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
307310
workspace,
308311
commit,
309312
issue,
313+
existingEvaluations: [evaluation],
310314
})
311315

312316
expect(result.ok).toBe(true)
@@ -337,6 +341,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
337341
workspace,
338342
commit,
339343
issue,
344+
existingEvaluations: [evaluation],
340345
})
341346

342347
expect(result.ok).toBe(true)
@@ -366,6 +371,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
366371
workspace,
367372
commit,
368373
issue,
374+
existingEvaluations: [evaluation],
369375
})
370376

371377
expect(result.ok).toBe(false)
@@ -379,6 +385,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
379385
workspace,
380386
commit,
381387
issue,
388+
existingEvaluations: [evaluation],
382389
})
383390

384391
expect(result.ok).toBe(true)
@@ -426,6 +433,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
426433
workspace,
427434
commit,
428435
issue,
436+
existingEvaluations: [evaluation],
429437
})
430438

431439
expect(result.ok).toBe(true)
@@ -535,6 +543,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
535543
workspace,
536544
commit: commit2,
537545
issue,
546+
existingEvaluations: [evaluation],
538547
})
539548

540549
expect(result.ok).toBe(true)
@@ -642,6 +651,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
642651
workspace,
643652
commit: draftCommit,
644653
issue,
654+
existingEvaluations: [evaluation],
645655
})
646656

647657
expect(result.ok).toBe(true)
@@ -733,6 +743,7 @@ describe('getSpanMessagesAndEvaluationResultsByIssue', () => {
733743
workspace,
734744
commit,
735745
issue,
746+
existingEvaluations: [evaluation],
736747
})
737748

738749
expect(result.ok).toBe(true)

packages/core/src/data-access/issues/getSpanMessagesAndEvaluationResultsByIssue.ts

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,18 @@ import {
55
EvaluationResultsV2Repository,
66
} from '../../repositories'
77
import { Result } from '../../lib/Result'
8-
import { EvaluationResultV2 } from '../../constants'
8+
import {
9+
EvaluationResultSuccessValue,
10+
EvaluationResultV2,
11+
EvaluationV2,
12+
} from '../../constants'
913
import { Issue } from '../../schema/models/types/Issue'
1014
import { Message as LegacyMessage } from '@latitude-data/constants/legacyCompiler'
1115
import { assembleTraceWithMessages } from '../../services/tracing/traces/assemble'
1216
import { adaptCompletionSpanMessagesToLegacy } from '../../services/tracing/spans/fetching/findCompletionSpanFromTrace'
1317
import { UnprocessableEntityError } from '../../lib/errors'
1418
import { getHITLSpansByIssue } from './getHITLSpansByIssue'
19+
import { getEvaluationMetricSpecification } from '../../services/evaluationsV2/specifications'
1520

1621
export type SpanMessagesWithEvalResultReason = {
1722
messages: LegacyMessage[]
@@ -29,10 +34,12 @@ export async function getSpanMessagesAndEvaluationResultsByIssue({
2934
workspace,
3035
commit,
3136
issue,
37+
existingEvaluations,
3238
}: {
3339
workspace: Workspace
3440
commit: Commit
3541
issue: Issue
42+
existingEvaluations: EvaluationV2[]
3643
}) {
3744
// Three is enough, as we don't want to overfit or add too many tokens to the prompt
3845
const spansResult = await getHITLSpansByIssue({
@@ -79,30 +86,37 @@ export async function getSpanMessagesAndEvaluationResultsByIssue({
7986
}
8087

8188
// There will always be exactly one evaluation result for a span and trace id
82-
const evaluationResults = evaluationResultsResult.filter(
89+
const evaluationResult = evaluationResultsResult.find(
8390
(result) =>
8491
result.evaluatedSpanId === span.id &&
8592
result.evaluatedTraceId === span.traceId,
86-
)[0]
93+
)
94+
95+
const evaluation = evaluationResult
96+
? existingEvaluations.find(
97+
(e) => e.uuid === evaluationResult.evaluationUuid,
98+
)
99+
: undefined
87100

88101
messagesAndEvaluationResults.push({
89102
messages: adaptCompletionSpanMessagesToLegacy(completionSpan),
90-
reason: getReasonFromEvaluationResult(evaluationResults),
103+
reason: getReasonFromEvaluationResult(evaluationResult, evaluation),
91104
})
92105
}
93106

94107
return Result.ok(messagesAndEvaluationResults)
95108
}
96109

97-
// We need an efficient way of extracting reasons directly from metadata without fetching evaluations
98-
function getReasonFromEvaluationResult(result: EvaluationResultV2) {
99-
if (result.error || !result.metadata) {
110+
function getReasonFromEvaluationResult(
111+
result: EvaluationResultV2 | undefined,
112+
evaluation: EvaluationV2 | undefined,
113+
): string {
114+
if (!result || !evaluation || result.error) {
100115
return ''
101116
}
102-
103-
// LLM, Rule, and Human evaluations all have a reason field (required for LLM, optional for Rule/Human)
104-
if ('reason' in result.metadata && result.metadata.reason) {
105-
return result.metadata.reason ?? ''
106-
}
107-
return ''
117+
const specification = getEvaluationMetricSpecification(evaluation)
118+
const resultReason = specification.resultReason as (
119+
result: EvaluationResultSuccessValue,
120+
) => string | undefined
121+
return resultReason(result) ?? ''
108122
}

packages/core/src/services/evaluationsV2/generateFromIssue/generateFromIssue.ts

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -99,24 +99,28 @@ export async function generateEvaluationConfigFromIssueWithCopilot(
9999
const copilot = copilotResult.unwrap()
100100

101101
// Get the existing evaluation names for the same commit and document to avoid generating evals with the same name (unique key)
102-
const existingEvaluationNamesResult = await getExistingEvaluationNames({
103-
workspace: workspace,
104-
commit: commit,
105-
issue: issue,
106-
})
107-
108-
if (!Result.isOk(existingEvaluationNamesResult)) {
109-
return existingEvaluationNamesResult
102+
const evaluationsRepository = new EvaluationsV2Repository(workspace.id)
103+
const evaluationsFromSameCommitAndDocumentResult =
104+
await evaluationsRepository.listAtCommitByDocument({
105+
projectId: commit.projectId,
106+
commitUuid: commit.uuid,
107+
documentUuid: issue.documentUuid,
108+
})
109+
if (!Result.isOk(evaluationsFromSameCommitAndDocumentResult)) {
110+
return evaluationsFromSameCommitAndDocumentResult
110111
}
112+
const existingEvaluations =
113+
evaluationsFromSameCommitAndDocumentResult.unwrap()
111114

112-
const existingEvaluationNames = existingEvaluationNamesResult.unwrap()
115+
const existingEvaluationNames = existingEvaluations.map((e) => e.name)
113116

114117
// Getting failed examples (evaluation results with the issue attached) to feed the copilot
115118
const messagesAndReasonWhyFailedForIssueResult =
116119
await getSpanMessagesAndEvaluationResultsByIssue({
117120
workspace: workspace,
118121
commit: commit,
119122
issue: issue,
123+
existingEvaluations,
120124
})
121125

122126
if (!Result.isOk(messagesAndReasonWhyFailedForIssueResult)) {
@@ -197,31 +201,6 @@ export async function generateEvaluationConfigFromIssueWithCopilot(
197201
return Result.ok(evaluationConfigWithProviderAndModel)
198202
}
199203

200-
async function getExistingEvaluationNames({
201-
workspace,
202-
commit,
203-
issue,
204-
}: {
205-
workspace: Workspace
206-
commit: Commit
207-
issue: Issue
208-
}) {
209-
const evaluationsRepository = new EvaluationsV2Repository(workspace.id)
210-
const evaluationsFromSameCommitAndDocumentResult =
211-
await evaluationsRepository.listAtCommitByDocument({
212-
projectId: commit.projectId,
213-
commitUuid: commit.uuid,
214-
documentUuid: issue.documentUuid,
215-
})
216-
if (!Result.isOk(evaluationsFromSameCommitAndDocumentResult)) {
217-
return evaluationsFromSameCommitAndDocumentResult
218-
}
219-
const existingEvaluations =
220-
evaluationsFromSameCommitAndDocumentResult.unwrap()
221-
222-
return Result.ok(existingEvaluations.map((e) => e.name))
223-
}
224-
225204
export async function getSpansFromSpanAndTraceIdPairs({
226205
spanAndTraceIdPairs,
227206
workspace,

0 commit comments

Comments
 (0)