fix readme

justjais · justjais · commit e346349eee0d · 2025-01-31T11:50:35.000+05:30
diff --git a/scripts/evaluation/README.md b/scripts/evaluation/README.md
@@ -11,6 +11,7 @@ Currently we have 2 types of evaluations.
 - QnAs were generated from OCP docs by LLMs. It is possible that some of the questions/answers are not entirely correct. We are constantly trying to verify both Questions & Answers manually. If you find any QnA pair to be modified or removed, please create a PR.
 - OLS API should be ready/live with all the required provider+model configured.
 - It is possible that we want to run both consistency and model evaluation together. To avoid multiple API calls for same query, *model* evaluation first checks .csv file generated by *consistency* evaluation. If response is not present in csv file, then only we call API to get the response.
+- User needs to install python `matplotlib`, and `rouge_score` before running the evaluation.
 
 ### e2e test case
 
@@ -21,6 +22,11 @@ These evaluations are also part of **e2e test cases**. Currently *consistency* e
 python -m scripts.evaluation.driver
 ```
 
+### Sample run command
+```
+OPENAI_API_KEY=IGNORED python -m scripts.evaluation.driver --qna_pool_file ./scripts/evaluation/eval_data/aap-sample.parquet --eval_provider_model_id my_rhoai+granite3-8b --eval_metrics answer_relevancy answer_similarity_llm cos_score rougeL_precision --eval_modes vanilla --judge_model granite3-8b --judge_provider my_rhoai3 --eval_query_ids qna1
+```
+
 ### Input Data/QnA pool
 [Json file](eval_data/question_answer_pair.json)
 
diff --git a/scripts/evaluation/utils/constants.py b/scripts/evaluation/utils/constants.py
@@ -11,6 +11,8 @@
     "azure_openai+gpt-4o": ("azure_openai", "gpt-4o"),
     "ollama+llama3.1:latest": ("ollama", "llama3.1:latest"),
     "ollama+mistral": ("ollama", "mistral"),
+    "my_rhoai+granite3-8b": ("my_rhoai", "granite3-8b"),
+    "my_rhoai3+granite3-1-8b": ("my_rhoai3", "granite3-1-8b"),
 }
 
 NON_LLM_EVALS = {

Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,8 @@`
`11`	`11`	`"azure_openai+gpt-4o": ("azure_openai", "gpt-4o"),`
`12`	`12`	`"ollama+llama3.1:latest": ("ollama", "llama3.1:latest"),`
`13`	`13`	`"ollama+mistral": ("ollama", "mistral"),`
	`14`	`+ "my_rhoai+granite3-8b": ("my_rhoai", "granite3-8b"),`
	`15`	`+ "my_rhoai3+granite3-1-8b": ("my_rhoai3", "granite3-1-8b"),`
`14`	`16`	`}`
`15`	`17`
`16`	`18`	`NON_LLM_EVALS = {`