Skip to content

Commit 99ab997

Browse files
committed
Fixes
1 parent cecaa40 commit 99ab997

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

src/lib/inngest/functions.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ async function _pollTaskUntilFinished({ testRunId }: { testRunId: number }) {
175175
})
176176
.where(eq(schema.testRun.id, dbTestRun.id))
177177

178+
// NOTE: Here we update all steps at once and mark them as passed.
178179
await tx
179180
.update(schema.testRunStep)
180181
.set({
@@ -196,6 +197,7 @@ async function _pollTaskUntilFinished({ testRunId }: { testRunId: number }) {
196197
})
197198
.where(eq(schema.testRun.id, dbTestRun.id))
198199

200+
// NOTE: We manually check each step to see if it was performed as expected.
199201
for (const step of dbTestRun.testRunSteps) {
200202
// TODO: Unify step ID types!
201203
const passed = taskResult.steps?.find((s) => s.id === `${step.stepId}`)
@@ -205,12 +207,12 @@ async function _pollTaskUntilFinished({ testRunId }: { testRunId: number }) {
205207
.set({
206208
status: passed ? 'passed' : 'failed',
207209
})
208-
.where(eq(schema.testRunStep.testRunId, dbTestRun.id))
210+
.where(eq(schema.testRunStep.id, step.id))
209211
}
210212
})
211213
}
212214

213-
return { ok: true }
215+
return { ok: true, data: buTaskResponse.data.output }
214216
}
215217

216218
case 'running':
@@ -241,6 +243,7 @@ async function _pollTaskUntilFinished({ testRunId }: { testRunId: number }) {
241243

242244
return {
243245
ok: false,
246+
data: buTaskResponse.data.output,
244247
}
245248
}
246249

src/lib/testing/engine.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ You'll be given a task description, steps, and success criteria. You need to
9696
9797
- Perform the steps in the exact order they are given.
9898
- Do not search for potential fixes or workarounds.
99+
- Keep explicit track (e.g. in a list) of the steps you have performed in your actions.
99100
100101
101102
# Success and Failure Criteria for Steps
@@ -120,6 +121,12 @@ For example:
120121
121122
# Response Format
122123
124+
Return a JSON object with the following format:
125+
126+
\`\`\`json
127+
${JSON.stringify(RESPONSE_JSON_SCHEMA, null, 2)}
128+
\`\`\`
129+
123130
Return \`{ status: "pass", steps: undefined, error: undefined }\` if you can successfully perform the task.
124131
125132
Return \`{ status: "failing", steps: [ { id: <number>, description: "<action that was taken>" } ], error: "<error message>" }\` if you cannot successfully perform the test. The steps array contains exactly the steps that were successfully performed and nothing more. If you cannot perform a step, the error message contains information about why the step failed. If the final state does not match the success criteria, the error message is a detailed short description explaining what is different on the actual application compared to the expected application state and success criteria.
@@ -129,6 +136,7 @@ Additionally:
129136
- DO NOT INCLUDE ANY OTHER TEXT IN YOUR RESPONSE.
130137
- CORRECTLY CHOOSE THE ID FOR EACH STEP.
131138
- STEPS NEED TO BE RETURNED IN THE EXACT ORDER THEY WERE GIVEN.
139+
- STRICTLY FOLLOW THE RESPONSE FORMAT DEFINED ABOVE!
132140
133141
# Prompt Format
134142
@@ -182,6 +190,8 @@ The task will be given in the following format:
182190
\`\`\`
183191
{ "status": "failing", "steps": [ { "id": "1", "description": "Go to the search page" } ], "error": "The search page is not found" }
184192
\`\`\`
193+
194+
185195
`
186196

187197
function stringifyTest(test: TestDefinition) {

0 commit comments

Comments
 (0)