Skip to content

Commit 610d8f1

Browse files
authored
Merge pull request #67 from ansible/chatbot_eval
PR to update e2e tests for Ansible chatbot service including model evaluation n response.
2 parents 16e7d1e + 3c3d95c commit 610d8f1

File tree

5 files changed

+26
-12
lines changed

5 files changed

+26
-12
lines changed

scripts/evaluation/driver.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
# ruff: noqa: I001
12
"""Driver for evaluation."""
23

34
import argparse
5+
from os import path
46
import sys
57

68
from httpx import Client
@@ -96,8 +98,11 @@ def main():
9698
client = Client(base_url=args.eval_api_url, verify=False) # noqa: S501
9799

98100
if "localhost" not in args.eval_api_url:
99-
with open(args.eval_api_token_file, mode="r", encoding="utf-8") as t_f:
100-
token = t_f.read().rstrip()
101+
if path.isfile(args.eval_api_token_file):
102+
with open(args.eval_api_token_file, mode="r", encoding="utf-8") as t_f:
103+
token = t_f.read().rstrip()
104+
else:
105+
token = args.eval_api_token_file
101106
client.headers.update({"Authorization": f"Bearer {token}"})
102107

103108
resp_eval = ResponseEvaluation(args, client)

scripts/evaluation/olsconfig.yaml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,18 @@ llm_providers:
1717
models:
1818
- name: "mistral"
1919
- name: 'llama3.2:latest'
20-
- name: my_rhoai
20+
- name: my_rhoai_g3
2121
type: openai
22-
url: "https://granite3-8b-wisdom-model-staging.apps.stage2-west.v2dz.p1.openshiftapps.com/v1"
22+
url: "http://localhost:8000/v1"
2323
credentials_path: ols_api_key.txt
2424
models:
2525
- name: granite3-8b
26+
- name: my_rhoai_g31
27+
type: openai
28+
url: "http://localhost:8000/v1"
29+
credentials_path: rhoai_api_key.txt
30+
models:
31+
- name: granite3-1-8b
2632
ols_config:
2733
# max_workers: 1
2834
reference_content:
@@ -37,8 +43,10 @@ ols_config:
3743
app_log_level: info
3844
lib_log_level: warning
3945
uvicorn_log_level: info
40-
default_provider: ollama
41-
default_model: 'llama3.2:latest'
46+
# default_provider: my_rhoai_g31
47+
# default_model: 'granite3-1-8b'
48+
default_provider: my_rhoai_g3
49+
default_model: 'granite3-8b'
4250
query_validation_method: llm
4351
user_data_collection:
4452
feedback_disabled: false

scripts/evaluation/utils/constants.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
"azure_openai+gpt-4o": ("azure_openai", "gpt-4o"),
1212
"ollama+llama3.1:latest": ("ollama", "llama3.1:latest"),
1313
"ollama+mistral": ("ollama", "mistral"),
14-
"my_rhoai+granite3-8b": ("my_rhoai", "granite3-8b"),
15-
"my_rhoai3+granite3-1-8b": ("my_rhoai3", "granite3-1-8b"),
14+
"my_rhoai_g31+granite3-1-8b": ("my_rhoai_g31", "granite3-1-8b"),
15+
"my_rhoai_g3+granite3-8b": ("my_rhoai_g3", "granite3-8b"),
1616
}
1717

1818
NON_LLM_EVALS = {
@@ -44,7 +44,7 @@
4444
DEFAULT_RESULT_DIR = "eval_result"
4545

4646
# Retry settings for LLM calls used when model does not respond reliably in 100% cases
47-
MAX_RETRY_ATTEMPTS = 10
47+
MAX_RETRY_ATTEMPTS = 5
4848
REST_API_TIMEOUT = 120
4949
TIME_TO_BREATH = 10
5050

scripts/evaluation/utils/prompts.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
1717
Use below json format for your response. Do not add any additional text apart from json output.
1818
{{
19-
Question: [
19+
"Question": [
2020
QUESTION 1,
2121
QUESTION 2,
22+
QUESTION 3
2223
],
23-
Valid: 0 or 1
24+
"Valid": 0 or 1
2425
}}
2526
2627
```

scripts/evaluation/utils/response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def get_model_response(query, provider, model, mode, api_client=None):
1717
"""Get response depending upon the mode."""
1818
if mode == "ols":
1919
response = api_client.post(
20-
"/v1/query",
20+
"/api/v1/ai/chat/",
2121
json={
2222
"query": query,
2323
"provider": provider,

0 commit comments

Comments
 (0)