Skip to content

Commit 9531957

Browse files
authored
Fix CI tests (#130)
- Instead of custom Deepeval wrappers -> use deepeval first-class `GeminiModel` support. - Note: we cannot use `api_key` because of limitation of free tier accounts. - Bump versions - Fixes #131, - Fixes #129 --------- Signed-off-by: Jack Luar <[email protected]>
1 parent 60f68a9 commit 9531957

File tree

9 files changed

+39
-11
lines changed

9 files changed

+39
-11
lines changed

.github/workflows/ci-secret.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,16 @@ jobs:
3535
- name: Populate environment variables
3636
run: |
3737
cp backend/.env.example backend/.env
38+
cp backend/.env evaluation/.env
39+
3840
sed -i 's|{{GOOGLE_API_KEY}}|${{ secrets.GOOGLE_API_KEY }}|g' backend/.env
41+
sed -i 's|{{GOOGLE_PROJECT_ID}}|${{ secrets.GOOGLE_PROJECT_ID }}|g' backend/.env
3942
sed -i 's|{{PATH_TO_GOOGLE_APPLICATION_CREDENTIALS}}|src/secret.json|g' backend/.env
40-
cp backend/.env evaluation/.env
41-
cp backend/.env frontend/.env
43+
44+
sed -i 's|{{GOOGLE_API_KEY}}|${{ secrets.GOOGLE_API_KEY }}|g' evaluation/.env
45+
sed -i 's|{{GOOGLE_PROJECT_ID}}|${{ secrets.GOOGLE_PROJECT_ID }}|g' evaluation/.env
46+
sed -i 's|{{PATH_TO_GOOGLE_APPLICATION_CREDENTIALS}}|auto_evaluation/src/secret.json|g' evaluation/.env
47+
4248
cp ${{ secrets.PATH_TO_GOOGLE_APPLICATION_CREDENTIALS }} backend/src
4349
cp ${{ secrets.PATH_TO_GOOGLE_APPLICATION_CREDENTIALS }} evaluation/auto_evaluation/src
4450
- name: Build Docker image

Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
FOLDERS=backend frontend evaluation
2+
GOOGLE_SECRET_JSON:=$(HOME)/secret.json
23

34
.PHONY: init
45
init:
@@ -28,6 +29,12 @@ docker-up:
2829
docker-down:
2930
@docker compose down --remove-orphans
3031

32+
# --- Development Commands ---
33+
.PHONY: seed-credentials
34+
seed-credentials:
35+
@cp $(GOOGLE_SECRET_JSON) backend/src
36+
@cp $(GOOGLE_SECRET_JSON) evaluation/auto_evaluation/src
37+
3138
.PHONY: changelog
3239
changelog:
3340
@git log --pretty=format:"%h%x09%an%x09%ad%x09%s" --date=short --since="2024-06-01" > CHANGELOG.md

backend/.dockerignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.venv
2+
*.egg-info
3+
.mypy-cache
4+
__pycache__
5+
faiss_db
6+
data
7+
tests

backend/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
GOOGLE_API_KEY={{GOOGLE_API_KEY}}
2+
GOOGLE_PROJECT_ID={{GOOGLE_PROJECT_ID}}
23
GOOGLE_APPLICATION_CREDENTIALS={{PATH_TO_GOOGLE_APPLICATION_CREDENTIALS}}
34

45
USE_CUDA=false

evaluation/auto_evaluation/eval_main.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from dotenv import load_dotenv
1212
from deepeval.test_case import LLMTestCase
1313
from deepeval import evaluate
14+
from deepeval.models import GeminiModel
1415

15-
from auto_evaluation.src.models.vertex_ai import GoogleVertexAILangChain
1616
from auto_evaluation.src.metrics.retrieval import (
1717
make_contextual_precision_metric,
1818
make_contextual_recall_metric,
@@ -42,7 +42,11 @@ def __init__(self, base_url: str, dataset: str, reranker_base_url: str = ""):
4242
self.dataset = dataset
4343
self.reranker_base_url = reranker_base_url
4444
self.qns = preprocess.read_data(self.dataset)
45-
self.eval_model = GoogleVertexAILangChain(model_name="gemini-1.5-pro-002")
45+
self.eval_model = GeminiModel(
46+
model_name="gemini-1.5-pro-002",
47+
project=os.getenv("GOOGLE_PROJECT_ID", ""),
48+
location=os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1"),
49+
)
4650
self.log_dir = "logs"
4751
os.makedirs(self.log_dir, exist_ok=True)
4852
self.sanity_check()
@@ -91,8 +95,8 @@ def evaluate(self, retriever: str):
9195

9296
# parallel evaluate
9397
evaluate(
94-
retrieval_tcs,
95-
[precision, recall, hallucination],
98+
test_cases=retrieval_tcs,
99+
metrics=[precision, recall, hallucination],
96100
print_results=False,
97101
)
98102

evaluation/auto_evaluation/src/metrics/retrieval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def make_faithfulness_metric(model: DeepEvalBaseLLM) -> FaithfulnessMetric:
4646
)
4747

4848

49-
def make_hallucination_metric(model: DeepEvalBaseLLM) -> FaithfulnessMetric:
49+
def make_hallucination_metric(model: DeepEvalBaseLLM) -> HallucinationMetric:
5050
return HallucinationMetric(
5151
threshold=HALLUCINATION_THRESHOLD,
5252
model=model,

evaluation/auto_evaluation/src/models/vertex_ai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ async def a_generate(self, prompt: str, schema: Any) -> Any:
6060
client=self.load_model(),
6161
mode=instructor.Mode.VERTEXAI_TOOLS,
6262
)
63-
resp = await instructor_client.completions.create(
63+
resp = await instructor_client.messages.create(
6464
messages=[
6565
{
6666
"role": "user",

evaluation/requirements.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@ requests==2.32.3
88
requests-oauthlib==2.0.0
99
streamlit==1.37.0
1010
gspread==6.1.2
11-
deepeval==1.4.9
12-
langchain-google-vertexai==2.0.6
11+
deepeval==2.6.8
12+
langchain-google-vertexai==2.0.15
1313
asyncio==3.4.3
1414
huggingface-hub==0.26.2
1515
instructor[vertexai]==1.5.2
1616
openai==1.58.1
1717
pydantic==2.10.4
1818
tqdm==4.67.1
19-
vertexai==1.71.1
2019
plotly==5.24.1

frontend/.dockerignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.venv
2+
*.egg-info
3+
.mypy-cache
4+
__pycache__

0 commit comments

Comments
 (0)