feat: improve multi-model evaluation reliability

tychenjiajun · tychenjiajun · commit 9d9340c2d7df · 2025-06-07T22:50:07.000+08:00
- Refactor generation evaluation to try models sequentially until success
- Enhance AI provider setup to support multiple models with index selection
- Simplify pre-commit hook to use local lint-staged
- Bump version to 2.0.2
diff --git a/.husky/pre-commit b/.husky/pre-commit
@@ -1 +1 @@
-npx lint-staged
+lint-staged
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "ai-pp3",
-  "version": "2.0.1",
+  "version": "2.0.2",
   "description": "CLI tool combining multimodal AI analysis with RawTherapee's engine to generate optimized PP3 profiles for RAW photography",
   "engines": {
     "node": ">=18"
diff --git a/src/ai-generation/ai-processor.ts b/src/ai-generation/ai-processor.ts
@@ -269,44 +269,44 @@ export function parseBestGenerationIndex(
 }
 
 /**
- * Evaluates multiple generations and selects the best one
+ * Handles the case when there's only one successful generation
  */
-export async function evaluateGenerations(
+function handleSingleGeneration(
+  successfulResults: GenerationResult[],
   generationResults: GenerationResult[],
+): { bestIndex: number; evaluationReason: string } {
+  const originalIndex = generationResults.indexOf(successfulResults[0]);
+  return {
+    bestIndex: originalIndex,
+    evaluationReason: "Only one successful generation available",
+  };
+}
+
+/**
+ * Attempts to evaluate generations with a specific model
+ */
+async function attemptEvaluationWithModel(
+  model: string,
+  modelIndex: number,
+  models: string[],
   providerName: string,
-  visionModel: string | string[],
+  imageContents: (
+    | { type: "text"; text: string }
+    | { type: "image"; image: Buffer }
+  )[],
   maxRetries: number,
+  successfulResults: GenerationResult[],
+  generationResults: GenerationResult[],
   verbose: boolean,
-): Promise<{ bestIndex: number; evaluationReason: string }> {
-  // Filter out failed generations
-  const successfulResults = generationResults.filter(
-    (result) => result.success,
-  );
-
-  if (successfulResults.length === 0) {
-    throw new Error("No successful generations to evaluate");
-  }
-
-  if (successfulResults.length === 1) {
-    // Find the index of this successful result in the original array
-    const originalIndex = generationResults.indexOf(successfulResults[0]);
-    return {
-      bestIndex: originalIndex,
-      evaluationReason: "Only one successful generation available",
-    };
-  }
-
-  const aiProvider = handleProviderSetup(providerName, visionModel);
-  // Only pass successful generations to prepareImageContents
-  const imageContents = await prepareImageContents(successfulResults, verbose);
-
-  if (verbose) {
+): Promise<{ bestIndex: number; evaluationReason: string } | null> {
+  if (verbose && models.length > 1) {
     console.log(
-      `Evaluating ${String(successfulResults.length)} successful generations with AI...`,
+      `Attempting evaluation with model ${model} (${String(modelIndex + 1)}/${String(models.length)})...`,
     );
   }
 
   try {
+    const aiProvider = handleProviderSetup(providerName, model);
     const response = await generateText({
       model: aiProvider,
       messages: [
@@ -343,18 +343,110 @@ export async function evaluateGenerations(
       bestIndex: finalIndex,
       evaluationReason: responseText,
     };
-  } catch (error) {
-    if (verbose) {
-      console.warn("AI evaluation failed, using first generation:", error);
-    }
-    // Find the index of the first successful generation in the original array
-    const firstSuccessfulIndex = generationResults.findIndex(
-      (result) => result.success,
+  } catch {
+    // Handle error in the calling function
+    return null;
+  }
+}
+
+/**
+ * Handles the fallback case when all models fail
+ */
+function handleAllModelsFailed(
+  generationResults: GenerationResult[],
+  lastError: unknown,
+  verbose: boolean,
+): { bestIndex: number; evaluationReason: string } {
+  if (verbose) {
+    console.warn(
+      "All AI evaluation models failed, using first generation as fallback:",
+      lastError,
     );
+  }
 
-    return {
-      bestIndex: Math.max(firstSuccessfulIndex, 0),
-      evaluationReason: `AI evaluation failed: ${error instanceof Error ? error.message : "Unknown error"}. Using first successful generation as fallback.`,
-    };
+  // Find the index of the first successful generation in the original array
+  const firstSuccessfulIndex = generationResults.findIndex(
+    (result) => result.success,
+  );
+
+  return {
+    bestIndex: Math.max(firstSuccessfulIndex, 0),
+    evaluationReason: `AI evaluation failed: ${lastError instanceof Error ? lastError.message : "Unknown error"}. Using first successful generation as fallback.`,
+  };
+}
+
+/**
+ * Evaluates multiple generations and selects the best one
+ * If multiple models are specified, it will try each one sequentially until successful
+ */
+export async function evaluateGenerations(
+  generationResults: GenerationResult[],
+  providerName: string,
+  visionModel: string | string[],
+  maxRetries: number,
+  verbose: boolean,
+): Promise<{ bestIndex: number; evaluationReason: string }> {
+  // Filter out failed generations
+  const successfulResults = generationResults.filter(
+    (result) => result.success,
+  );
+
+  if (successfulResults.length === 0) {
+    throw new Error("No successful generations to evaluate");
+  }
+
+  if (successfulResults.length === 1) {
+    return handleSingleGeneration(successfulResults, generationResults);
   }
+
+  // Only pass successful generations to prepareImageContents
+  const imageContents = await prepareImageContents(successfulResults, verbose);
+
+  if (verbose) {
+    console.log(
+      `Evaluating ${String(successfulResults.length)} successful generations with AI...`,
+    );
+  }
+
+  // Convert visionModel to array for sequential attempts
+  const models = Array.isArray(visionModel) ? visionModel : [visionModel];
+  let lastError: unknown = null;
+
+  // Try each model sequentially until one succeeds
+  for (let modelIndex = 0; modelIndex < models.length; modelIndex++) {
+    const currentModel = models[modelIndex];
+
+    const result = await attemptEvaluationWithModel(
+      currentModel,
+      modelIndex,
+      models,
+      providerName,
+      imageContents,
+      maxRetries,
+      successfulResults,
+      generationResults,
+      verbose,
+    );
+
+    if (result) {
+      return result;
+    } else {
+      // Handle error and try next model if available
+      const error = new Error(`Evaluation with model ${currentModel} failed`);
+      lastError = error;
+
+      if (verbose) {
+        console.warn(
+          `AI evaluation with model ${currentModel} failed: ${error.message}`,
+        );
+      }
+
+      if (modelIndex < models.length - 1 && verbose) {
+        console.log(`Trying next model: ${models[modelIndex + 1]}...`);
+      }
+    }
+  }
+
+  // If we get here, all models failed
+  return handleAllModelsFailed(generationResults, lastError, verbose);
 }
diff --git a/src/utils/ai-provider.ts b/src/utils/ai-provider.ts
@@ -4,13 +4,23 @@
 
 import { provider } from "../provider.js";
 
+/**
+ * Sets up the AI provider with the specified model
+ *
+ * @param providerName The name of the AI provider to use
+ * @param model A single model name or an array of model names
+ * @param modelIndex Optional index to use a specific model from the array (defaults to 0)
+ * @returns The configured AI provider
+ */
 export function handleProviderSetup(
   providerName: string,
   model: string | string[],
+  modelIndex?: number,
 ) {
   try {
-    // If model is an array, use the first model in the array
-    const modelToUse = Array.isArray(model) ? model[0] : model;
+    // If model is an array, use the specified model index or default to the first model
+    const modelToUse = Array.isArray(model) ? model[modelIndex ?? 0] : model;
+
     return provider(providerName)(modelToUse);
   } catch (error: unknown) {
     throw new Error(

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "ai-pp3",`
`3`		`- "version": "2.0.1",`
	`3`	`+ "version": "2.0.2",`
`4`	`4`	`"description": "CLI tool combining multimodal AI analysis with RawTherapee's engine to generate optimized PP3 profiles for RAW photography",`
`5`	`5`	`"engines": {`
`6`	`6`	`"node": ">=18"`