add llm_tests target and CI

luarss · luarss · commit 06cfc0319913 · 2024-11-09T14:31:26.000Z
Signed-off-by: Jack Luar &lt;jluar@precisioninno.com&gt;
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -28,6 +28,16 @@ jobs:
     - name: Build Docker image
       run: |
         make docker
+    - name: Run LLM CI
+      working-directory: evaluation
+      run: |
+        make llm-tests
+    - name: Create commit comment
+      working-directory: evaluation
+      uses: peter-evans/commit-comment@v3
+      with:
+        token: ${{ secrets.GH_PATH }}
+        body-path: llm-tests-output.txt
     - name: Teardown
       if: always()
       run: |
diff --git a/Makefile b/Makefile
@@ -1,4 +1,6 @@
-FOLDERS=backend frontend
+.PHONY: init init-dev format check
+
+FOLDERS=backend frontend evaluation
 
 init:
 	@for folder in $(FOLDERS); do (cd $$folder && make init && cd ../); done
diff --git a/evaluation/Makefile b/evaluation/Makefile
@@ -1,3 +1,5 @@
+.PHONY: init init-dev format check clean
+
 init:
 	@python3 -m venv .venv && \
 		. .venv/bin/activate && \
@@ -16,3 +18,10 @@ format:
 check:
 	@. .venv/bin/activate && \
 		ruff check --fix
+
+clean:
+	@rm -f llm_tests_output.txt
+
+llm-tests: clean
+	@. .venv/bin/activate && \
+		./auto_evaluation/llm_tests.sh > llm_tests_output.txt 2>&1
diff --git a/evaluation/auto_evaluation/llm_tests.sh b/evaluation/auto_evaluation/llm_tests.sh
@@ -0,0 +1,18 @@
+#!/bin/bash -eu
+
+retrievers=(
+    "agent-retriever" \
+    "ensemble" \
+)
+
+echo "==================================="
+echo "==> Dataset: EDA Corpus"
+for retriever in "${retrievers[@]}" ; do
+    echo "==> Running tests for $retriever"
+    python auto_evaluation/eval_main.py \
+       --base_url http://localhost:8000 \
+       --dataset ./auto_evaluation/dataset/EDA_Corpus_100_Question.csv \
+       --retriever $retriever
+    echo "==> Done"
+done
+echo "==================================="
diff --git a/evaluation/llm_tests_output.txt b/evaluation/llm_tests_output.txt
@@ -0,0 +1,16 @@
+===================================
+==> Dataset: EDA Corpus
+==> Running tests for agent-retriever
+/home/luars/ORAssistant/evaluation/.venv/lib/python3.12/site-packages/deepeval/__init__.py:49: UserWarning: You are using deepeval version 1.4.9, however version 1.5.0 is available. You should consider upgrading via the "pip install --upgrade deepeval" command.
+  warnings.warn(
+Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 33.41it/s]
+Traceback (most recent call last):
+  File "/home/luars/ORAssistant/evaluation/auto_evaluation/eval_main.py", line 146, in <module>
+    harness = EvaluationHarness(args.base_url, args.dataset, args.reranker_base_url)
+              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/luars/ORAssistant/evaluation/auto_evaluation/eval_main.py", line 44, in __init__
+    self.qns = preprocess.read_data(self.dataset)
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/luars/ORAssistant/evaluation/auto_evaluation/dataset/preprocess.py", line 10, in read_data
+    assert len(header) == 2, "CSV file must have exactly 2 columns"
+AssertionError: CSV file must have exactly 2 columns