diff --git a/runner/eval-cli.ts b/runner/eval-cli.ts index 828fcf3..78fdb77 100644 --- a/runner/eval-cli.ts +++ b/runner/eval-cli.ts @@ -1,6 +1,5 @@ import { Arguments, Argv, CommandModule } from 'yargs'; import chalk from 'chalk'; -import { assertValidModelName, LlmRunner } from './codegen/llm-runner.js'; import { BUILT_IN_ENVIRONMENTS, DEFAULT_AUTORATER_MODEL_NAME, @@ -11,8 +10,7 @@ import { logReportToConsole, writeReportToDisk, } from './reporting/report-logging.js'; -import { getRunnerByName, RunnerName } from './codegen/runner-creation.js'; -import { GenkitRunner } from './codegen/genkit/genkit-runner.js'; +import { RunnerName } from './codegen/runner-creation.js'; import { UserFacingError } from './utils/errors.js'; export const EvalModule = { @@ -166,9 +164,6 @@ function builder(argv: Argv): Argv { } async function handler(cliArgs: Arguments): Promise { - let llm: LlmRunner | null = null; - let ratingLlm: GenkitRunner | null = null; - if (!cliArgs.environment) { console.error( chalk.red( @@ -184,9 +179,7 @@ async function handler(cliArgs: Arguments): Promise { } try { - ratingLlm = await getRunnerByName('genkit'); const runInfo = await generateCodeAndAssess({ - ratingLlm, runner: cliArgs.runner, model: cliArgs.model, environmentConfigPath: @@ -222,7 +215,5 @@ async function handler(cliArgs: Arguments): Promise { console.error(chalk.red((error as Error).stack)); } } - } finally { - await ratingLlm?.dispose(); } } diff --git a/runner/orchestration/generate.ts b/runner/orchestration/generate.ts index 90104db..e5c2347 100644 --- a/runner/orchestration/generate.ts +++ b/runner/orchestration/generate.ts @@ -9,7 +9,6 @@ import { assertValidModelName, LlmGenerateFilesContext, LlmGenerateFilesResponse, - LlmRunner, } from '../codegen/llm-runner.js'; import { DEFAULT_AUTORATER_MODEL_NAME, @@ -54,9 +53,8 @@ import { UserFacingError } from '../utils/errors.js'; import { getRunGroupId } from './grouping.js'; import { executeCommand } from '../utils/exec.js'; import { EvalID, Gateway } from './gateway.js'; -import { LocalGateway } from './gateways/local_gateway.js'; import { LocalEnvironment } from '../configuration/environment-local.js'; -import { RunnerName } from '../codegen/runner-creation.js'; +import { getRunnerByName, RunnerName } from '../codegen/runner-creation.js'; /** * Orchestrates the entire assessment process for each prompt defined in the `prompts` array. @@ -71,7 +69,6 @@ import { RunnerName } from '../codegen/runner-creation.js'; * each containing the prompt, generated code, and final validation status. */ export async function generateCodeAndAssess(options: { - ratingLlm: GenkitRunner; model: string; runner: RunnerName; environmentConfigPath: string; @@ -96,6 +93,7 @@ export async function generateCodeAndAssess(options: { options.environmentConfigPath, options.runner ); + const ratingLlm = await getRunnerByName('genkit'); // TODO(devversion): Consider validating model names also for remote environments. if (env instanceof LocalEnvironment) { @@ -179,7 +177,7 @@ export async function generateCodeAndAssess(options: { evalID, env, env.gateway, - options.ratingLlm, + ratingLlm, options.model, rootPromptDef, options.localMode, @@ -254,7 +252,7 @@ export async function generateCodeAndAssess(options: { const timestamp = new Date(); const details = { summary: await prepareSummary( - options.ratingLlm, + ratingLlm, new AbortController().signal, // Note: AI summarization is currently not abortable. options.model, env,