thisisartium · tkersey · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/.github/workflows/calculate-error-margin.sh b/.github/workflows/calculate-error-margin.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Input variables
+FAILURE_COUNT=$1
+SAMPLE_SIZE=$2
+CONFIDENCE_LEVEL=0.95
+
+# Calculate sample proportion
+p_hat=$(echo "scale=4; $FAILURE_COUNT / $SAMPLE_SIZE" | bc)
+
+# Determine z-score for the given confidence level
+# For 95% confidence level, z-score is approximately 1.96
+z=1.96
+
+# Calculate standard error
+SE=$(echo "scale=6; sqrt($p_hat * (1 - $p_hat) / $SAMPLE_SIZE)" | bc)
+
+# Calculate margin of error
+ME=$(echo "scale=6; $z * $SE" | bc)
+
+# Calculate confidence interval bounds as proportions
+LOWER_BOUND_PROP=$(echo "scale=6; $p_hat - $ME" | bc)
+UPPER_BOUND_PROP=$(echo "scale=6; $p_hat + $ME" | bc)
+
+# Convert proportion bounds to integer counts
+LOWER_BOUND_COUNT=$(echo "$LOWER_BOUND_PROP * $SAMPLE_SIZE" | bc | awk '{print ($1 > int($1)) ? int($1) + 1 : int($1)}')
+UPPER_BOUND_COUNT=$(echo "$UPPER_BOUND_PROP * $SAMPLE_SIZE / 1"  | bc)
+
+echo "> [!NOTE]
+> ### There are $FAILURE_COUNT failures out of $SAMPLE_SIZE generations.
+> Sample Proportion (p̂): $p_hat
+> Standard Error (SE): $SE
+> Margin of Error (ME): $ME
+> 95% Confidence Interval: [$LOWER_BOUND_PROP, $UPPER_BOUND_PROP]
+> 95% Confidence Interval (Count): [$LOWER_BOUND_COUNT, $UPPER_BOUND_COUNT]"
diff --git a/.github/workflows/cat-test-examples.yml b/.github/workflows/cat-test-examples.yml
@@ -1,6 +1,8 @@
 name: CAT
 
 on:
+  push:
+    branches: [ ci-experiment/** ]
   workflow_dispatch:
     inputs:
       rounds:
@@ -57,11 +59,10 @@ jobs:
 #            http://localhost:9000/yourbucket/yourfile.zip
 
       - name: Show number of test failures
+        if: always()
         run: |
-          FAILURE_COUNT=$(find examples/team_recommender/tests/test_runs -type f -name "fail-*" | wc -l)
-          if [ "$FAILURE_COUNT" -gt 0 ]; then
-            echo "::warning::There are $FAILURE_COUNT failures"
-          fi
+          FAILURES=$(find examples/team_recommender/tests/test_runs -type f -name "fail-*" | wc -l)
+          .github/workflows/calculate-error-margin.sh $FAILURES $CAT_AI_SAMPLE_SIZE >> $GITHUB_STEP_SUMMARY
 
       - name: Upload artifacts to Google Drive
         if: always()