chore: restore e2e-test.yml for demo

EfeAcar6431 · EfeAcar6431 · commit 23a25f56fa2b · 2026-04-03T16:26:09.000-04:00
Made-with: Cursor
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
@@ -0,0 +1,40 @@
+name: VerifyWise LLM Evaluation
+
+on:
+  workflow_dispatch:
+
+jobs:
+  evaluate:
+    name: VerifyWise LLM Evaluation (gpt-4o-mini)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Run evaluation
+        id: eval
+        uses: ./
+        with:
+          api_url: https://29ae92ae4bd295.lhr.life
+          project_id: project_20260403_144244_140917
+          dataset_id: "52"
+          metrics: answer_relevancy,bias
+          model_name: gpt-4o-mini
+          model_provider: openai
+          threshold: "0.5"
+          timeout_minutes: "5"
+          poll_interval_seconds: "10"
+          experiment_name: "GH-Actions-E2E-Test"
+          fail_on_threshold: "false"
+          vw_api_token: ${{ secrets.VW_API_TOKEN }}
+          llm_api_key: ${{ secrets.LLM_API_KEY }}
+          # judge_api_key not set — defaults to llm_api_key
+
+      - name: Show results
+        if: always()
+        run: |
+          echo "Passed: ${{ steps.eval.outputs.passed }}"
+          echo "Experiment: ${{ steps.eval.outputs.experiment_id }}"
+          if [ -f "${{ steps.eval.outputs.results_path }}" ]; then
+            echo "--- Results ---"
+            cat "${{ steps.eval.outputs.results_path }}"
+          fi