Skip to content

Commit c3dea1d

Browse files
authored
Merge pull request #115 from Azure-Samples/installable
Don't misuse the model configuration to store extra keys
2 parents 28f46a0 + a6a323e commit c3dea1d

File tree

3 files changed

+32
-15
lines changed

3 files changed

+32
-15
lines changed

src/evaltools/eval/evaluate.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import logging
3+
import os
34
import time
45
from pathlib import Path
56

@@ -94,6 +95,8 @@ def run_evaluation(
9495
num_questions=None,
9596
target_response_answer_jmespath=None,
9697
target_response_context_jmespath=None,
98+
model=None,
99+
azure_credential=None,
97100
):
98101
logger.info("Running evaluation using data from %s", testdata_path)
99102
testdata = load_jsonl(testdata_path)
@@ -123,8 +126,8 @@ def run_evaluation(
123126
return False
124127

125128
logger.info("Sending a test chat completion to the GPT deployment to ensure it is running...")
126-
gpt_response = service_setup.get_openai_client(openai_config).chat.completions.create(
127-
model=openai_config["model"],
129+
gpt_response = service_setup.get_openai_client(openai_config, azure_credential).chat.completions.create(
130+
model=model,
128131
messages=[{"role": "user", "content": "Hello!"}],
129132
n=1,
130133
)
@@ -190,7 +193,7 @@ def evaluate_row(row):
190193

191194
with open(results_dir / "evaluate_parameters.json", "w", encoding="utf-8") as parameters_file:
192195
parameters = {
193-
"evaluation_gpt_model": openai_config["model"],
196+
"evaluation_gpt_model": model,
194197
"evaluation_timestamp": int(time.time()),
195198
"testdata_path": str(testdata_path),
196199
"target_url": target_url,
@@ -221,7 +224,14 @@ def process_config(obj: dict):
221224

222225

223226
def run_evaluate_from_config(
224-
working_dir, config_path, num_questions=None, target_url=None, results_dir=None, openai_config=None
227+
working_dir,
228+
config_path,
229+
num_questions=None,
230+
target_url=None,
231+
results_dir=None,
232+
openai_config=None,
233+
model=None,
234+
azure_credential=None,
225235
):
226236
config_path = working_dir / Path(config_path)
227237
logger.info("Running evaluation from config %s", config_path)
@@ -245,6 +255,8 @@ def run_evaluate_from_config(
245255
),
246256
target_response_answer_jmespath=config.get("target_response_answer_jmespath", "message.content"),
247257
target_response_context_jmespath=config.get("target_response_context_jmespath", "context.data_points.text"),
258+
model=model or os.environ["OPENAI_GPT_MODEL"],
259+
azure_credential=azure_credential,
248260
)
249261

250262
if evaluation_run_complete:

src/evaltools/review/diff_markdown.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
from pathlib import Path
2+
from typing import Any
23

34
from .utils import diff_directories
45

56

7+
def _round_metric(value: Any) -> Any:
8+
if isinstance(value, float):
9+
return round(value, 1)
10+
return value
11+
12+
613
def main(directories: list[Path], changed: str | None = None):
714
data_dicts = diff_directories(directories, changed)
815

@@ -29,14 +36,14 @@ def main(directories: list[Path], changed: str | None = None):
2936
if isinstance(value, int | float):
3037
metrics[column] = []
3138
for metric_name in metrics.keys():
39+
first_value = _round_metric(data_dicts[0][question].get(metric_name))
3240
for ind, data_dict in enumerate(data_dicts):
33-
value = data_dict[question].get(metric_name)
34-
value_str = str(round(value, 1) if isinstance(value, float) else value)
41+
value = _round_metric(data_dict[question].get(metric_name))
3542
# Insert arrow emoji based on the difference between metric value and the first data_dict
3643
value_emoji = ""
37-
if value is not None and ind > 0 and value != data_dicts[0][question][metric_name]:
44+
if value is not None and ind > 0 and value != first_value:
3845
value_emoji = "⬆️" if value > data_dicts[0][question][metric_name] else "⬇️"
39-
metrics[metric_name].append(f"{value_str} {value_emoji}")
46+
metrics[metric_name].append(f"{value} {value_emoji}")
4047
# make a row for each metric
4148
for metric_name, metric_values in metrics.items():
4249
markdown_str += (

src/evaltools/service_setup.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def get_openai_config() -> dict:
3838
}
3939
# azure-ai-evaluate will call DefaultAzureCredential behind the scenes,
4040
# so we must be logged in to Azure CLI with the correct tenant
41-
openai_config["model"] = os.environ["OPENAI_GPT_MODEL"]
4241
else:
4342
logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY")
4443
openai_config: OpenAIModelConfiguration = {
@@ -97,14 +96,13 @@ def get_search_client():
9796
)
9897

9998

100-
def get_openai_client(oai_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
99+
def get_openai_client(
100+
oai_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], azure_credential=None
101+
):
101102
if "azure_deployment" in oai_config:
102103
azure_token_provider = None
103-
azure_credential = None
104-
if "credential" in oai_config:
105-
logger.info("Using Azure OpenAI Service with provided credential")
106-
azure_credential = oai_config["credential"]
107-
elif not os.environ.get("AZURE_OPENAI_KEY"):
104+
105+
if azure_credential is None and not os.environ.get("AZURE_OPENAI_KEY"):
108106
logger.info("Using Azure OpenAI Service with Azure Developer CLI Credential")
109107
azure_credential = get_azd_credential(os.environ.get("AZURE_OPENAI_TENANT_ID"))
110108
if azure_credential is not None:

0 commit comments

Comments
 (0)