Skip to content

Commit f92a3f0

Browse files
committed
Add support for general LLMs in FactCheckingEvaluator
The FactCheckingEvaluator was initially designed for the Bespoke Minicheck model which doesn't require explicit instructions. This change adds support for general-purpose LLMs like Claude and GPT-4 that need clear evaluation instructions in their prompts. Key changes: - Introduce separate prompts for Bespoke and general LLMs - Add factory method for Bespoke Minicheck configuration - Make evaluation prompt configurable via constructor This maintains backward compatibility while enabling the evaluator to work effectively with any LLM implementation.
1 parent 95eb9fd commit f92a3f0

File tree

1 file changed

+30
-3
lines changed

1 file changed

+30
-3
lines changed

spring-ai-core/src/main/java/org/springframework/ai/evaluation/FactCheckingEvaluator.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,19 +64,46 @@
6464
public class FactCheckingEvaluator implements Evaluator {
6565

6666
private static final String DEFAULT_EVALUATION_PROMPT_TEXT = """
67+
Evaluate whether or not the following claim is supported by the provided document.
68+
Respond with "yes" if the claim is supported, or "no" if it is not.
69+
Document: \\n {document}\\n
70+
Claim: \\n {claim}
71+
""";
72+
73+
private static final String BESPOKE_EVALUATION_PROMPT_TEXT = """
6774
Document: \\n {document}\\n
6875
Claim: \\n {claim}
6976
""";
7077

7178
private final ChatClient.Builder chatClientBuilder;
79+
private final String evaluationPrompt;
7280

7381
/**
7482
* Constructs a new FactCheckingEvaluator with the provided ChatClient.Builder.
75-
* @param chatClientBuilder The builder for the ChatClient used to perform the
76-
* evaluation
83+
* Uses the default evaluation prompt suitable for general purpose LLMs.
84+
* @param chatClientBuilder The builder for the ChatClient used to perform the evaluation
7785
*/
7886
public FactCheckingEvaluator(ChatClient.Builder chatClientBuilder) {
87+
this(chatClientBuilder, DEFAULT_EVALUATION_PROMPT_TEXT);
88+
}
89+
90+
/**
91+
* Constructs a new FactCheckingEvaluator with the provided ChatClient.Builder and evaluation prompt.
92+
* @param chatClientBuilder The builder for the ChatClient used to perform the evaluation
93+
* @param evaluationPrompt The prompt text to use for evaluation
94+
*/
95+
public FactCheckingEvaluator(ChatClient.Builder chatClientBuilder, String evaluationPrompt) {
7996
this.chatClientBuilder = chatClientBuilder;
97+
this.evaluationPrompt = evaluationPrompt;
98+
}
99+
100+
/**
101+
* Creates a FactCheckingEvaluator configured for use with the Bespoke Minicheck model.
102+
* @param chatClientBuilder The builder for the ChatClient used to perform the evaluation
103+
* @return A FactCheckingEvaluator configured for Bespoke Minicheck
104+
*/
105+
public static FactCheckingEvaluator forBespokeMinicheck(ChatClient.Builder chatClientBuilder) {
106+
return new FactCheckingEvaluator(chatClientBuilder, BESPOKE_EVALUATION_PROMPT_TEXT);
80107
}
81108

82109
/**
@@ -94,7 +121,7 @@ public EvaluationResponse evaluate(EvaluationRequest evaluationRequest) {
94121

95122
String evaluationResponse = this.chatClientBuilder.build()
96123
.prompt()
97-
.user(userSpec -> userSpec.text(DEFAULT_EVALUATION_PROMPT_TEXT)
124+
.user(userSpec -> userSpec.text(evaluationPrompt)
98125
.param("document", context)
99126
.param("claim", response))
100127
.call()

0 commit comments

Comments
 (0)