@@ -42,34 +42,40 @@ class LLMRAGAnswerRelevancy(BaseOpenAI):
4242 "source_frameworks" : "Ragas"
4343 }
4444
45- # 问题生成的prompt模板
46- question_generation_prompt = """为给定的答案生成一个问题,并判断该答案是否是非承诺性的。如果答案是非承诺性的,将noncommittal设为1;如果答案是承诺性的,将noncommittal设为0。非承诺性答案是指回避、模糊或模棱两可的回答。例如,"我不知道"或"我不确定"就是非承诺性答案。
47-
48- --------EXAMPLES-----------
49- 示例1
50- 输入: {{
51- "response": "爱因斯坦出生于德国。"
52- }}
53- 输出: {{
54- "question": "爱因斯坦出生于哪里?",
55- "noncommittal": 0
56- }}
57-
58- 示例2
59- 输入: {{
60- "response": "我不知道2023年发明的智能手机的突破性功能,因为我对2022年以后的信息不了解。"
61- }}
62- 输出: {{
63- "question": "2023年发明的智能手机的突破性功能是什么?",
64- "noncommittal": 1
65- }}
66- -----------------------------
67-
68- 现在对以下输入执行相同的操作。请尝试从不同角度生成问题,使用不同的表述方式,但保持与原答案的相关性。
69- 输入: {{
70- "response": {0}
71- }}
72- 输出: """
45+ question_generation_prompt = """Task: Generate a question for the given answer and identify if the answer is noncommittal.
46+
47+ Instructions:
48+ 1. Generate a single question that directly corresponds to the provided answer content.
49+ 2. Determine if the answer is noncommittal:
50+ - Set "noncommittal" to 1 if the answer is evasive, vague, or ambiguous (e.g., "I don't know", "I'm not sure")
51+ - Set "noncommittal" to 0 if the answer provides a clear, direct response
52+ 3. Ensure the generated question maintains a consistent language style throughout.
53+
54+ --------EXAMPLES-----------
55+ Example 1:
56+ Input: {{
57+ "response": "Albert Einstein was born in Germany."
58+ }}
59+ Output: {{
60+ "question": "Where was Albert Einstein born?",
61+ "noncommittal": 0
62+ }}
63+
64+ Example 2:
65+ Input: {{
66+ "response": "I don't know about the groundbreaking feature of the smartphone invented in 2023 as I'm unaware of information beyond 2022."
67+ }}
68+ Output: {{
69+ "question": "What was the groundbreaking feature of the smartphone invented in 2023?",
70+ "noncommittal": 1
71+ }}
72+ -----------------------------
73+
74+ Now perform the same with the following input:
75+ Input: {{
76+ "response": {0}
77+ }}
78+ Output: """
7379
7480 # 默认的embedding模型
7581 embedding_model = None
@@ -159,6 +165,10 @@ def calculate_similarity(cls, question: str, generated_questions: List[str]) ->
159165 if cls .embedding_model is None :
160166 cls .init_embedding_model ()
161167
168+ # 检查生成的问题是否为空列表或全为空字符串
169+ if not generated_questions or all (q == "" for q in generated_questions ):
170+ return np .array ([])
171+
162172 # 生成embedding
163173 # 单个查询的embedding
164174 question_response = cls .embedding_model ['client' ].embeddings .create (
@@ -179,15 +189,15 @@ def calculate_similarity(cls, question: str, generated_questions: List[str]) ->
179189 return np .dot (gen_question_vec , question_vec .T ).reshape (- 1 ) / norm
180190
181191 @classmethod
182- def calculate_score (cls , answers : List [Dict [str , Any ]], original_question : str ) -> float :
183- """计算答案相关性分数 """
192+ def calculate_score (cls , answers : List [Dict [str , Any ]], original_question : str ) -> tuple [ float , List [ Dict [ str , Any ]]] :
193+ """计算答案相关性分数并收集详细信息 """
184194 # 提取生成的问题
185195 gen_questions = [answer .get ("question" , "" ) for answer in answers ]
186196
187197 # 检查是否所有生成的问题都为空
188198 if all (q == "" for q in gen_questions ):
189199 log .warning ("Invalid response. Expected dictionary with key 'question'" )
190- return 0.0
200+ return 0.0 , []
191201
192202 # 检查是否所有答案都是不置可否的
193203 all_noncommittal = np .all ([answer .get ("noncommittal" , 0 ) for answer in answers ])
@@ -196,12 +206,25 @@ def calculate_score(cls, answers: List[Dict[str, Any]], original_question: str)
196206 cosine_sim = cls .calculate_similarity (original_question , gen_questions )
197207
198208 # 计算最终分数
199- score = cosine_sim .mean () * int (not all_noncommittal )
200-
201- # 转换为0-10的分数范围
202- score = float (score * 10 )
203-
204- return score
209+ if len (cosine_sim ) == 0 :
210+ score = 0.0
211+ else :
212+ score = cosine_sim .mean () * int (not all_noncommittal )
213+ # 转换为0-10的分数范围
214+ score = float (score * 10 )
215+
216+ # 收集详细信息
217+ details = []
218+ for i , (answer , question , sim ) in enumerate (zip (answers , gen_questions , cosine_sim )):
219+ is_noncommittal = answer .get ("noncommittal" , 0 ) == 1
220+ details .append ({
221+ "question_index" : i + 1 ,
222+ "generated_question" : question ,
223+ "similarity_score" : sim ,
224+ "is_noncommittal" : is_noncommittal
225+ })
226+
227+ return score , details
205228
206229 @classmethod
207230 def eval (cls , input_data : Data ) -> EvalDetail :
@@ -230,8 +253,8 @@ def eval(cls, input_data: Data) -> EvalDetail:
230253 # 生成多个相关问题
231254 generated_questions = cls .generate_multiple_questions (input_data , cls .strictness )
232255
233- # 计算相关性分数
234- score = cls .calculate_score (generated_questions , original_question )
256+ # 计算相关性分数和详细信息
257+ score , details = cls .calculate_score (generated_questions , original_question )
235258
236259 # 构建结果
237260 result = EvalDetail (metric = cls .__name__ )
@@ -249,14 +272,24 @@ def eval(cls, input_data: Data) -> EvalDetail:
249272 if embedding_model_name :
250273 cls .init_embedding_model (embedding_model_name )
251274
275+ # 构建详细的reason文本
276+ all_reasons = []
277+ for detail in details :
278+ noncommittal_text = "(不置可否的回答)" if detail ["is_noncommittal" ] else ""
279+ all_reasons .append (f"生成的问题{ detail ['question_index' ]} : { detail ['generated_question' ]} { noncommittal_text } \n 与原始问题的相似度: { detail ['similarity_score' ]:.4f} " )
280+
281+ reason_text = "\n \n " .join (all_reasons )
282+ if details :
283+ reason_text += f"\n \n 平均相似度: { np .mean ([d ['similarity_score' ] for d in details ]):.4f} \n 是否所有回答都不置可否: { '是' if np .all ([d ['is_noncommittal' ] for d in details ]) else '否' } "
284+
252285 if score >= threshold :
253286 result .status = False
254287 result .label = ["QUALITY_GOOD.ANSWER_RELEVANCY_PASS" ]
255- result .reason = [f"答案相关性评估通过 (分数: { score :.2f} /10)" ]
288+ result .reason = [f"答案相关性评估通过 (分数: { score :.2f} /10)\n { reason_text } " ]
256289 else :
257290 result .status = True
258291 result .label = ["QUALITY_BAD.ANSWER_RELEVANCY_FAIL" ]
259- result .reason = [f"答案相关性评估未通过 (分数: { score :.2f} /10)" ]
292+ result .reason = [f"答案相关性评估未通过 (分数: { score :.2f} /10)\n { reason_text } " ]
260293
261294 return result
262295
0 commit comments