Add support for general LLMs in FactCheckingEvaluator

markpollack · markpollack · commit f92a3f0fcb57 · 2024-12-19T15:14:18.000-05:00
The FactCheckingEvaluator was initially designed for the Bespoke Minicheck
model which doesn't require explicit instructions. This change adds support
for general-purpose LLMs like Claude and GPT-4 that need clear evaluation
instructions in their prompts.

Key changes:
- Introduce separate prompts for Bespoke and general LLMs
- Add factory method for Bespoke Minicheck configuration
- Make evaluation prompt configurable via constructor

This maintains backward compatibility while enabling the evaluator to work
effectively with any LLM implementation.
diff --git a/spring-ai-core/src/main/java/org/springframework/ai/evaluation/FactCheckingEvaluator.java b/spring-ai-core/src/main/java/org/springframework/ai/evaluation/FactCheckingEvaluator.java
@@ -64,19 +64,46 @@
 public class FactCheckingEvaluator implements Evaluator {
 
 	private static final String DEFAULT_EVALUATION_PROMPT_TEXT = """
+				Evaluate whether or not the following claim is supported by the provided document.
+				Respond with "yes" if the claim is supported, or "no" if it is not.
+				Document: \\n {document}\\n
+				Claim: \\n {claim}
+			""";
+
+	private static final String BESPOKE_EVALUATION_PROMPT_TEXT = """
 				Document: \\n {document}\\n
 				Claim: \\n {claim}
 			""";
 
 	private final ChatClient.Builder chatClientBuilder;
+	private final String evaluationPrompt;
 
 	/**
 	 * Constructs a new FactCheckingEvaluator with the provided ChatClient.Builder.
-	 * @param chatClientBuilder The builder for the ChatClient used to perform the
-	 * evaluation
+	 * Uses the default evaluation prompt suitable for general purpose LLMs.
+	 * @param chatClientBuilder The builder for the ChatClient used to perform the evaluation
 	 */
 	public FactCheckingEvaluator(ChatClient.Builder chatClientBuilder) {
+		this(chatClientBuilder, DEFAULT_EVALUATION_PROMPT_TEXT);
+	}
+
+	/**
+	 * Constructs a new FactCheckingEvaluator with the provided ChatClient.Builder and evaluation prompt.
+	 * @param chatClientBuilder The builder for the ChatClient used to perform the evaluation
+	 * @param evaluationPrompt The prompt text to use for evaluation
+	 */
+	public FactCheckingEvaluator(ChatClient.Builder chatClientBuilder, String evaluationPrompt) {
 		this.chatClientBuilder = chatClientBuilder;
+		this.evaluationPrompt = evaluationPrompt;
+	}
+
+	/**
+	 * Creates a FactCheckingEvaluator configured for use with the Bespoke Minicheck model.
+	 * @param chatClientBuilder The builder for the ChatClient used to perform the evaluation
+	 * @return A FactCheckingEvaluator configured for Bespoke Minicheck
+	 */
+	public static FactCheckingEvaluator forBespokeMinicheck(ChatClient.Builder chatClientBuilder) {
+		return new FactCheckingEvaluator(chatClientBuilder, BESPOKE_EVALUATION_PROMPT_TEXT);
 	}
 
 	/**
@@ -94,7 +121,7 @@ public EvaluationResponse evaluate(EvaluationRequest evaluationRequest) {
 
 		String evaluationResponse = this.chatClientBuilder.build()
 			.prompt()
-			.user(userSpec -> userSpec.text(DEFAULT_EVALUATION_PROMPT_TEXT)
+			.user(userSpec -> userSpec.text(evaluationPrompt)
 				.param("document", context)
 				.param("claim", response))
 			.call()