diff --git a/.github/workflows/calculate-error-margin.sh b/.github/workflows/calculate-error-margin.sh new file mode 100755 index 0000000..250a844 --- /dev/null +++ b/.github/workflows/calculate-error-margin.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Input variables +FAILURE_COUNT=$1 +SAMPLE_SIZE=$2 +CONFIDENCE_LEVEL=0.95 + +# Calculate sample proportion +p_hat=$(echo "scale=4; $FAILURE_COUNT / $SAMPLE_SIZE" | bc) + +# Determine z-score for the given confidence level +# For 95% confidence level, z-score is approximately 1.96 +z=1.96 + +# Calculate standard error +SE=$(echo "scale=6; sqrt($p_hat * (1 - $p_hat) / $SAMPLE_SIZE)" | bc) + +# Calculate margin of error +ME=$(echo "scale=6; $z * $SE" | bc) + +# Calculate confidence interval bounds as proportions +LOWER_BOUND_PROP=$(echo "scale=6; $p_hat - $ME" | bc) +UPPER_BOUND_PROP=$(echo "scale=6; $p_hat + $ME" | bc) + +# Convert proportion bounds to integer counts +LOWER_BOUND_COUNT=$(echo "$LOWER_BOUND_PROP * $SAMPLE_SIZE" | bc | awk '{print ($1 > int($1)) ? int($1) + 1 : int($1)}') +UPPER_BOUND_COUNT=$(echo "$UPPER_BOUND_PROP * $SAMPLE_SIZE / 1" | bc) + +echo "> [!NOTE] +> ### There are $FAILURE_COUNT failures out of $SAMPLE_SIZE generations. +> Sample Proportion (p̂): $p_hat +> Standard Error (SE): $SE +> Margin of Error (ME): $ME +> 95% Confidence Interval: [$LOWER_BOUND_PROP, $UPPER_BOUND_PROP] +> 95% Confidence Interval (Count): [$LOWER_BOUND_COUNT, $UPPER_BOUND_COUNT]" \ No newline at end of file diff --git a/.github/workflows/cat-test-examples.yml b/.github/workflows/cat-test-examples.yml index e14afcc..d4cd54b 100644 --- a/.github/workflows/cat-test-examples.yml +++ b/.github/workflows/cat-test-examples.yml @@ -1,6 +1,8 @@ name: CAT on: + push: + branches: [ ci-experiment/** ] workflow_dispatch: inputs: rounds: @@ -57,11 +59,10 @@ jobs: # http://localhost:9000/yourbucket/yourfile.zip - name: Show number of test failures + if: always() run: | - FAILURE_COUNT=$(find examples/team_recommender/tests/test_runs -type f -name "fail-*" | wc -l) - if [ "$FAILURE_COUNT" -gt 0 ]; then - echo "::warning::There are $FAILURE_COUNT failures" - fi + FAILURES=$(find examples/team_recommender/tests/test_runs -type f -name "fail-*" | wc -l) + .github/workflows/calculate-error-margin.sh $FAILURES $CAT_AI_SAMPLE_SIZE >> $GITHUB_STEP_SUMMARY - name: Upload artifacts to Google Drive if: always()