Skip to content

(draft/poc) Refactoring to improve llm usability #365

(draft/poc) Refactoring to improve llm usability

(draft/poc) Refactoring to improve llm usability #365

Workflow file for this run

name: CI
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
# Prevent multiple workflow runs for the same PR/branch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
# Set minimal required permissions
permissions:
contents: read
actions: read
jobs:
# Unit tests - run across all Python versions
unit-tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12", "3.13"]
fail-fast: false # Continue other versions even if one fails
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Restore uv cache
uses: actions/cache@v4
with:
path: ~/.cache/uv
key: ${{ runner.os }}-uv-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }}
restore-keys: |
${{ runner.os }}-uv-${{ matrix.python-version }}-
${{ runner.os }}-uv-
- name: Create virtual environment and install dependencies
run: |
uv sync --frozen
- name: Run unit tests
run: |
# Run tests with JUnit XML output for parsing
./test.sh unit 2>&1 | tee unit_test_output.log
# Extract test results from pytest output
echo "Parsing test results..."
# Get test counts from the output with better parsing
TOTAL_TESTS=$(grep -oE "[0-9]+ selected" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
PASSED_TESTS=$(grep -oE "[0-9]+ passed" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
FAILED_TESTS=$(grep -oE "[0-9]+ failed" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
SKIPPED_TESTS=$(grep -oE "[0-9]+ skipped" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
WARNINGS=$(grep -c "warnings summary" unit_test_output.log | tr -d '\n' || echo "0")
# Get test duration
DURATION=$(grep -oE "in [0-9]+\.[0-9]+s" unit_test_output.log | grep -o "[0-9]*\.[0-9]*" | head -1 | tr -d '\n' || echo "0.00")
# Ensure variables are clean (no whitespace, default to 0 if empty)
TOTAL_TESTS=$(echo "$TOTAL_TESTS" | sed 's/[^0-9]//g')
PASSED_TESTS=$(echo "$PASSED_TESTS" | sed 's/[^0-9]//g')
FAILED_TESTS=$(echo "$FAILED_TESTS" | sed 's/[^0-9]//g')
SKIPPED_TESTS=$(echo "$SKIPPED_TESTS" | sed 's/[^0-9]//g')
WARNINGS=$(echo "$WARNINGS" | sed 's/[^0-9]//g')
# Set defaults if empty after cleaning
TOTAL_TESTS=${TOTAL_TESTS:-0}
PASSED_TESTS=${PASSED_TESTS:-0}
FAILED_TESTS=${FAILED_TESTS:-0}
SKIPPED_TESTS=${SKIPPED_TESTS:-0}
WARNINGS=${WARNINGS:-0}
DURATION=${DURATION:-0.00}
echo "Debug: Parsed values for unit tests:"
echo " TOTAL_TESTS='$TOTAL_TESTS'"
echo " PASSED_TESTS='$PASSED_TESTS'"
echo " FAILED_TESTS='$FAILED_TESTS'"
echo " SKIPPED_TESTS='$SKIPPED_TESTS'"
echo " WARNINGS='$WARNINGS'"
echo " DURATION='$DURATION'"
# Save results as JSON for later aggregation using jq to ensure valid JSON
jq -n \
--arg python_version "${{ matrix.python-version }}" \
--arg test_type "unit" \
--arg total "${TOTAL_TESTS:-0}" \
--arg passed "${PASSED_TESTS:-0}" \
--arg failed "${FAILED_TESTS:-0}" \
--arg skipped "${SKIPPED_TESTS:-0}" \
--arg warnings "${WARNINGS:-0}" \
--arg duration "${DURATION:-0.00}" \
'{
python_version: $python_version,
test_type: $test_type,
total: (if $total == "" then 0 else ($total | tonumber) end),
passed: (if $passed == "" then 0 else ($passed | tonumber) end),
failed: (if $failed == "" then 0 else ($failed | tonumber) end),
skipped: (if $skipped == "" then 0 else ($skipped | tonumber) end),
warnings: (if $warnings == "" then 0 else ($warnings | tonumber) end),
duration: $duration
}' > unit_test_results_${{ matrix.python-version }}.json
- name: Prepare unit test artifacts
if: always()
run: |
# Create artifact directory and copy files that exist
mkdir -p artifact_upload
cp unit_test_results_${{ matrix.python-version }}.json artifact_upload/ || true
cp unit_test_output.log artifact_upload/ || true
if [ -d ".pytest_cache" ]; then
cp -r .pytest_cache artifact_upload/
fi
- name: Upload unit test results
uses: actions/upload-artifact@v4
if: always()
with:
name: unit-test-results-${{ matrix.python-version }}
path: artifact_upload/
retention-days: 7
# Integration tests - run across all Python versions
integration-tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12", "3.13"]
fail-fast: false # Continue other versions even if one fails
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Restore uv cache
uses: actions/cache@v4
with:
path: ~/.cache/uv
key: ${{ runner.os }}-uv-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }}
restore-keys: |
${{ runner.os }}-uv-${{ matrix.python-version }}-
${{ runner.os }}-uv-
- name: Create virtual environment and install dependencies
run: |
uv sync --frozen
- name: Run integration tests
run: |
# Run tests with captured output for parsing
./test.sh integration 2>&1 | tee integration_test_output.log
# Extract test results from pytest output
echo "Parsing test results..."
# Get test counts from the output with better parsing
TOTAL_TESTS=$(grep -oE "[0-9]+ selected" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
PASSED_TESTS=$(grep -oE "[0-9]+ passed" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
FAILED_TESTS=$(grep -oE "[0-9]+ failed" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
SKIPPED_TESTS=$(grep -oE "[0-9]+ skipped" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0")
WARNINGS=$(grep -c "warnings summary" integration_test_output.log | tr -d '\n' || echo "0")
# Get test duration
DURATION=$(grep -oE "in [0-9]+\.[0-9]+s" integration_test_output.log | grep -o "[0-9]*\.[0-9]*" | head -1 | tr -d '\n' || echo "0.00")
# Ensure variables are clean (no whitespace, default to 0 if empty)
TOTAL_TESTS=$(echo "$TOTAL_TESTS" | sed 's/[^0-9]//g')
PASSED_TESTS=$(echo "$PASSED_TESTS" | sed 's/[^0-9]//g')
FAILED_TESTS=$(echo "$FAILED_TESTS" | sed 's/[^0-9]//g')
SKIPPED_TESTS=$(echo "$SKIPPED_TESTS" | sed 's/[^0-9]//g')
WARNINGS=$(echo "$WARNINGS" | sed 's/[^0-9]//g')
# Set defaults if empty after cleaning
TOTAL_TESTS=${TOTAL_TESTS:-0}
PASSED_TESTS=${PASSED_TESTS:-0}
FAILED_TESTS=${FAILED_TESTS:-0}
SKIPPED_TESTS=${SKIPPED_TESTS:-0}
WARNINGS=${WARNINGS:-0}
DURATION=${DURATION:-0.00}
echo "Debug: Parsed values for integration tests:"
echo " TOTAL_TESTS='$TOTAL_TESTS'"
echo " PASSED_TESTS='$PASSED_TESTS'"
echo " FAILED_TESTS='$FAILED_TESTS'"
echo " SKIPPED_TESTS='$SKIPPED_TESTS'"
echo " WARNINGS='$WARNINGS'"
echo " DURATION='$DURATION'"
# Save results as JSON for later aggregation using jq to ensure valid JSON
jq -n \
--arg python_version "${{ matrix.python-version }}" \
--arg test_type "integration" \
--arg total "${TOTAL_TESTS:-0}" \
--arg passed "${PASSED_TESTS:-0}" \
--arg failed "${FAILED_TESTS:-0}" \
--arg skipped "${SKIPPED_TESTS:-0}" \
--arg warnings "${WARNINGS:-0}" \
--arg duration "${DURATION:-0.00}" \
'{
python_version: $python_version,
test_type: $test_type,
total: (if $total == "" then 0 else ($total | tonumber) end),
passed: (if $passed == "" then 0 else ($passed | tonumber) end),
failed: (if $failed == "" then 0 else ($failed | tonumber) end),
skipped: (if $skipped == "" then 0 else ($skipped | tonumber) end),
warnings: (if $warnings == "" then 0 else ($warnings | tonumber) end),
duration: $duration
}' > integration_test_results_${{ matrix.python-version }}.json
- name: Prepare integration test artifacts
if: always()
run: |
# Create artifact directory and copy files that exist
mkdir -p artifact_upload_integration
cp integration_test_results_${{ matrix.python-version }}.json artifact_upload_integration/ || true
cp integration_test_output.log artifact_upload_integration/ || true
if [ -d ".pytest_cache" ]; then
cp -r .pytest_cache artifact_upload_integration/
fi
if [ -d "htmlcov" ]; then
cp -r htmlcov artifact_upload_integration/
fi
- name: Upload integration test results
uses: actions/upload-artifact@v4
if: always()
with:
name: integration-test-results-${{ matrix.python-version }}
path: artifact_upload_integration/
retention-days: 7
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Restore uv cache
uses: actions/cache@v4
with:
path: ~/.cache/uv
key: ${{ runner.os }}-uv-lint-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }}
restore-keys: |
${{ runner.os }}-uv-lint-
${{ runner.os }}-uv-
- name: Create virtual environment and install dependencies
run: |
uv sync --frozen
- name: Run linting
run: |
uv run ruff check src/ tests/ examples/
uv run ruff format --check src/ tests/ examples/
security:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Restore uv cache
uses: actions/cache@v4
with:
path: ~/.cache/uv
key: ${{ runner.os }}-uv-security-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }}
restore-keys: |
${{ runner.os }}-uv-security-
${{ runner.os }}-uv-
- name: Create virtual environment and install dependencies
run: |
uv sync --frozen
- name: Run security checks
run: |
uv run bandit -r src/ -f json -o bandit-report.json || true
uv run safety check --json --output safety-report.json || true
- name: Upload security reports
uses: actions/upload-artifact@v4
if: always()
with:
name: security-reports
path: |
bandit-report.json
safety-report.json
retention-days: 30
# Test Results Summary - runs after all tests complete
test-summary:
runs-on: ubuntu-latest
needs: [unit-tests, integration-tests, lint, security]
if: always()
steps:
- name: Download all test results
uses: actions/download-artifact@v4
with:
pattern: "*-test-results-*"
merge-multiple: true
- name: Generate Test Summary
if: always()
run: |
echo "# 🧪 Test Results Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Debug: List available files
echo "Available test result files:"
ls -la *test_results*.json || echo "No test result files found"
# Create test results table
echo "## Unit Tests" >> $GITHUB_STEP_SUMMARY
echo "| Python Version | Total Tests | ✅ Passed | ❌ Failed | ⏭️ Skipped | ⚠️ Warnings | ⏱️ Duration |" >> $GITHUB_STEP_SUMMARY
echo "|----------------|-------------|-----------|-----------|------------|-------------|------------|" >> $GITHUB_STEP_SUMMARY
# Process unit test results
UNIT_FILES_FOUND=false
for file in unit_test_results_*.json; do
if [ -f "$file" ]; then
UNIT_FILES_FOUND=true
echo "Processing unit test file: $file"
# Validate JSON first
if ! jq empty "$file" 2>/dev/null; then
echo "Invalid JSON in $file, skipping..."
continue
fi
VERSION=$(jq -r '.python_version // "unknown"' "$file")
TOTAL=$(jq -r '.total // 0' "$file")
PASSED=$(jq -r '.passed // 0' "$file")
FAILED=$(jq -r '.failed // 0' "$file")
SKIPPED=$(jq -r '.skipped // 0' "$file")
WARNINGS=$(jq -r '.warnings // 0' "$file")
DURATION=$(jq -r '.duration // "0.00"' "$file")
# Add status emoji based on results
STATUS_ICON="✅"
if [ "$FAILED" != "0" ] && [ "$FAILED" != "null" ]; then
STATUS_ICON="❌"
fi
echo "| $STATUS_ICON Python $VERSION | $TOTAL | $PASSED | $FAILED | $SKIPPED | $WARNINGS | ${DURATION}s |" >> $GITHUB_STEP_SUMMARY
fi
done
if [ "$UNIT_FILES_FOUND" = false ]; then
echo "| ⚠️ No unit test results found | - | - | - | - | - | - |" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
# Create integration test results table
echo "## Integration Tests" >> $GITHUB_STEP_SUMMARY
echo "| Python Version | Total Tests | ✅ Passed | ❌ Failed | ⏭️ Skipped | ⚠️ Warnings | ⏱️ Duration |" >> $GITHUB_STEP_SUMMARY
echo "|----------------|-------------|-----------|-----------|------------|-------------|------------|" >> $GITHUB_STEP_SUMMARY
# Process integration test results
INTEGRATION_FILES_FOUND=false
for file in integration_test_results_*.json; do
if [ -f "$file" ]; then
INTEGRATION_FILES_FOUND=true
echo "Processing integration test file: $file"
# Validate JSON first
if ! jq empty "$file" 2>/dev/null; then
echo "Invalid JSON in $file, skipping..."
continue
fi
VERSION=$(jq -r '.python_version // "unknown"' "$file")
TOTAL=$(jq -r '.total // 0' "$file")
PASSED=$(jq -r '.passed // 0' "$file")
FAILED=$(jq -r '.failed // 0' "$file")
SKIPPED=$(jq -r '.skipped // 0' "$file")
WARNINGS=$(jq -r '.warnings // 0' "$file")
DURATION=$(jq -r '.duration // "0.00"' "$file")
# Add status emoji based on results
STATUS_ICON="✅"
if [ "$FAILED" != "0" ] && [ "$FAILED" != "null" ]; then
STATUS_ICON="❌"
fi
echo "| $STATUS_ICON Python $VERSION | $TOTAL | $PASSED | $FAILED | $SKIPPED | $WARNINGS | ${DURATION}s |" >> $GITHUB_STEP_SUMMARY
fi
done
if [ "$INTEGRATION_FILES_FOUND" = false ]; then
echo "| ⚠️ No integration test results found | - | - | - | - | - | - |" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
# Calculate totals across all versions and test types
echo "## Overall Summary" >> $GITHUB_STEP_SUMMARY
TOTAL_TESTS=0
TOTAL_PASSED=0
TOTAL_FAILED=0
TOTAL_SKIPPED=0
TOTAL_WARNINGS=0
for file in *_test_results_*.json; do
if [ -f "$file" ]; then
# Validate JSON first
if ! jq empty "$file" 2>/dev/null; then
echo "Invalid JSON in $file, skipping from totals..."
continue
fi
TESTS=$(jq -r '.total // 0' "$file")
PASSED=$(jq -r '.passed // 0' "$file")
FAILED=$(jq -r '.failed // 0' "$file")
SKIPPED=$(jq -r '.skipped // 0' "$file")
WARNINGS=$(jq -r '.warnings // 0' "$file")
# Only add to totals if we got valid numbers
if [[ "$TESTS" =~ ^[0-9]+$ ]]; then
TOTAL_TESTS=$((TOTAL_TESTS + TESTS))
fi
if [[ "$PASSED" =~ ^[0-9]+$ ]]; then
TOTAL_PASSED=$((TOTAL_PASSED + PASSED))
fi
if [[ "$FAILED" =~ ^[0-9]+$ ]]; then
TOTAL_FAILED=$((TOTAL_FAILED + FAILED))
fi
if [[ "$SKIPPED" =~ ^[0-9]+$ ]]; then
TOTAL_SKIPPED=$((TOTAL_SKIPPED + SKIPPED))
fi
if [[ "$WARNINGS" =~ ^[0-9]+$ ]]; then
TOTAL_WARNINGS=$((TOTAL_WARNINGS + WARNINGS))
fi
fi
done
echo "| Metric | Count |" >> $GITHUB_STEP_SUMMARY
echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| 📊 Total Tests | $TOTAL_TESTS |" >> $GITHUB_STEP_SUMMARY
echo "| ✅ Passed | $TOTAL_PASSED |" >> $GITHUB_STEP_SUMMARY
echo "| ❌ Failed | $TOTAL_FAILED |" >> $GITHUB_STEP_SUMMARY
echo "| ⏭️ Skipped | $TOTAL_SKIPPED |" >> $GITHUB_STEP_SUMMARY
echo "| ⚠️ Warnings | $TOTAL_WARNINGS |" >> $GITHUB_STEP_SUMMARY
# Add overall status
if [ "$TOTAL_FAILED" -eq 0 ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "## 🎉 All Tests Passed!" >> $GITHUB_STEP_SUMMARY
echo "All $TOTAL_TESTS tests passed successfully across all Python versions." >> $GITHUB_STEP_SUMMARY
else
echo "" >> $GITHUB_STEP_SUMMARY
echo "## ⚠️ Some Tests Failed" >> $GITHUB_STEP_SUMMARY
echo "$TOTAL_FAILED out of $TOTAL_TESTS tests failed. Please check the individual job logs for details." >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "---" >> $GITHUB_STEP_SUMMARY
echo "*Generated on $(date)* | *Workflow run: ${{ github.run_id }}*" >> $GITHUB_STEP_SUMMARY