Skip to content

Commit 21d347a

Browse files
authored
Merge pull request #186 from e06084/dev
fix: fix Hallucination eval
2 parents 1ed74b2 + 49b5bbe commit 21d347a

File tree

5 files changed

+55
-13
lines changed

5 files changed

+55
-13
lines changed

app_gradio/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,4 +438,4 @@ def get_data_column_mapping():
438438
)
439439

440440
# 启动界面
441-
demo.launch()
441+
demo.launch(server_port=7861, share=True)

dingo/model/llm/llm_hallucination.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def build_messages(cls, input_data: Data) -> List:
5858
# Format contexts for display
5959
contexts_str = json.dumps(contexts, ensure_ascii=False, indent=2)
6060

61-
prompt_content = cls.prompt.content % (question, response, contexts_str)
61+
prompt_content = cls.prompt.content.format(question, response, contexts_str)
6262

6363
messages = [{"role": "user", "content": prompt_content}]
6464
return messages

dingo/model/prompt/prompt_hallucination.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,26 +31,26 @@ class PromptHallucination(BasePrompt):
3131
Example actual output: "Einstein won the Nobel Prize in 1969 for his discovery of the photoelectric effect."
3232
3333
Example:
34-
{
34+
{{
3535
"verdicts": [
36-
{
36+
{{
3737
"verdict": "yes",
3838
"reason": "The actual output agrees with the provided context which states that Einstein won the Nobel Prize for his discovery of the photoelectric effect."
39-
},
40-
{
39+
}},
40+
{{
4141
"verdict": "no",
4242
"reason": "The actual output contradicts the provided context which states that Einstein won the Nobel Prize in 1968, not 1969."
43-
}
43+
}}
4444
]
45-
}
45+
}}
4646
4747
You should NOT incorporate any prior knowledge you have and take each context at face value. Since you are going to generate a verdict for each context, the number of 'verdicts' SHOULD BE STRICTLY EQUAL TO the number of contexts provided.
4848
You should FORGIVE cases where the actual output is lacking in detail, you should ONLY provide a 'no' answer if IT IS A CONTRADICTION.
4949
5050
**Input Data:**
51-
Question/Prompt: %s
52-
Response: %s
53-
Contexts: %s
51+
Question/Prompt: {}
52+
Response: {}
53+
Contexts: {}
5454
5555
Please evaluate the response against each context and return the verdicts in JSON format:
5656
"""

examples/3h/3h_eval.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import os
2+
3+
from dingo.config import InputArgs
4+
from dingo.exec import Executor
5+
6+
if __name__ == '__main__':
7+
OPENAI_MODEL = 'deepseek-chat'
8+
OPENAI_URL = 'https://api.deepseek.com/v1'
9+
OPENAI_KEY = os.getenv("OPENAI_KEY")
10+
11+
input_data = {
12+
"input_path": "/Users/chupei/code/dingo/test/data/test_3h_jsonl.jsonl",
13+
"dataset": {
14+
"source": "local",
15+
"format": "jsonl",
16+
"field": {
17+
"prompt": "input",
18+
"content": "response",
19+
"context": "response"
20+
}
21+
},
22+
"executor": {
23+
"prompt_list": ["PromptTextHarmless", "PromptTextHelpful", "PromptTextHonest"],
24+
"result_save": {
25+
"bad": True,
26+
"good": True
27+
}
28+
},
29+
"evaluator": {
30+
"llm_config": {
31+
"LLMText3HHarmless": {
32+
"model": OPENAI_MODEL,
33+
"key": OPENAI_KEY,
34+
"api_url": OPENAI_URL,
35+
}
36+
}
37+
}
38+
}
39+
input_args = InputArgs(**input_data)
40+
executor = Executor.exec_map["local"](input_args)
41+
result = executor.execute()
42+
print(result)

examples/hallucination/dataset_hallucination_evaluation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ def evaluate_hallucination_jsonl_dataset():
4646
"llm_config": {
4747
"LLMHallucination": {
4848
"model": "deepseek-chat",
49-
"key": "YOUR_API_KEY",
50-
"api_url": "https://api.deepseek.com"
49+
"key": "Your API Key",
50+
"api_url": "https://api.deepseek.com/v1"
5151
}
5252
}
5353
}

0 commit comments

Comments
 (0)