Skip to content

Commit ad0986f

Browse files
authored
Merge pull request #9 from jpodivin/new_prompt
New prompt
2 parents b8bf6c3 + 888c57b commit ad0986f

File tree

1 file changed

+25
-20
lines changed

1 file changed

+25
-20
lines changed

validation.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,30 +41,21 @@ def get_similarity_score(
4141
"""
4242

4343
prompt = f"""
44-
You are an AI performance evaluator. Your task is to compare two text snippets and rate their similarity on a scale of 1 to 10, where 1 is completely dissimilar and 10 is identical or semantically equivalent.
45-
Provide only the integer score in your response.
44+
Analyze the semantic similarity between the 'expected_output' and the 'actual_output'.
4645
47-
Expected Response:
48-
---
49-
{expected_text}
50-
---
46+
Your task is to rate their similarity on an integer scale from 1 to 10.
47+
- A score of 1 means they are completely dissimilar in meaning, topic, and intent.
48+
- **A score of 7-9 means the actual output contains all the critical information of the expected output, but also includes additional, relevant explanations or details.**
49+
- A score of 10 means they are semantically identical, conveying the exact same information and intent, even if phrasing differs.
5150
52-
Actual Response:
53-
---
54-
{actual_text}
5551
---
56-
57-
Similarity Score (1-10):
52+
"expected_output": "{expected_text}"
53+
"actual_output": "{actual_text}"
5854
"""
59-
6055
try:
6156
response = llm_client.chat.completions.create(
6257
model=llm_model,
6358
messages=[
64-
{
65-
"role": "system",
66-
"content": "You are a helpful assistant that provides similarity scores.",
67-
},
6859
{"role": "user", "content": prompt},
6960
],
7061
response_format={
@@ -95,7 +86,11 @@ def get_similarity_score(
9586

9687

9788
def evaluate_samples(
98-
directory: str, server_address: str, llm_url: str, llm_model: str, llm_token: str,
89+
directory: str,
90+
server_address: str,
91+
llm_url: str,
92+
llm_model: str,
93+
llm_token: str,
9994
log_detective_api_timeout: int,
10095
) -> None:
10196
"""
@@ -141,13 +136,18 @@ def evaluate_samples(
141136
print(
142137
f"Calling Log Detective API: {full_api_url} with log file URL: {log_file_url}"
143138
)
144-
api_response = requests.post(full_api_url, json=payload, timeout=log_detective_api_timeout)
139+
api_response = requests.post(
140+
full_api_url, json=payload, timeout=log_detective_api_timeout
141+
)
145142
api_response.raise_for_status()
146143
actual_response_data = api_response.json()
147144
# Extract the text from the 'explanation' object based on the provided schema
148145
actual_issue = actual_response_data["explanation"]["text"]
149146
except requests.exceptions.RequestException as e:
150-
print(f"Error calling Log Detective API for {log_file_url}: {e}", file=sys.stderr)
147+
print(
148+
f"Error calling Log Detective API for {log_file_url}: {e}",
149+
file=sys.stderr,
150+
)
151151
continue
152152
except ValueError:
153153
print(
@@ -199,7 +199,12 @@ def main():
199199
)
200200
parser.add_argument("llm_url", help="URL of LLM API to use as judge")
201201
parser.add_argument("llm_model", help="Name of LLM model to use a judge")
202-
parser.add_argument("log_detective_api_timeout", help="Request timeout for Log Detective API", type=int, default=60)
202+
parser.add_argument(
203+
"log_detective_api_timeout",
204+
help="Request timeout for Log Detective API",
205+
type=int,
206+
default=60,
207+
)
203208
args = parser.parse_args()
204209

205210
if not API_KEY:

0 commit comments

Comments
 (0)