(draft/poc) Refactoring to improve llm usability #365
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [ main, develop ] | |
| pull_request: | |
| branches: [ main, develop ] | |
| # Prevent multiple workflow runs for the same PR/branch | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| # Set minimal required permissions | |
| permissions: | |
| contents: read | |
| actions: read | |
| jobs: | |
| # Unit tests - run across all Python versions | |
| unit-tests: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.11", "3.12", "3.13"] | |
| fail-fast: false # Continue other versions even if one fails | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v3 | |
| - name: Restore uv cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/uv | |
| key: ${{ runner.os }}-uv-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }} | |
| restore-keys: | | |
| ${{ runner.os }}-uv-${{ matrix.python-version }}- | |
| ${{ runner.os }}-uv- | |
| - name: Create virtual environment and install dependencies | |
| run: | | |
| uv sync --frozen | |
| - name: Run unit tests | |
| run: | | |
| # Run tests with JUnit XML output for parsing | |
| ./test.sh unit 2>&1 | tee unit_test_output.log | |
| # Extract test results from pytest output | |
| echo "Parsing test results..." | |
| # Get test counts from the output with better parsing | |
| TOTAL_TESTS=$(grep -oE "[0-9]+ selected" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| PASSED_TESTS=$(grep -oE "[0-9]+ passed" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| FAILED_TESTS=$(grep -oE "[0-9]+ failed" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| SKIPPED_TESTS=$(grep -oE "[0-9]+ skipped" unit_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| WARNINGS=$(grep -c "warnings summary" unit_test_output.log | tr -d '\n' || echo "0") | |
| # Get test duration | |
| DURATION=$(grep -oE "in [0-9]+\.[0-9]+s" unit_test_output.log | grep -o "[0-9]*\.[0-9]*" | head -1 | tr -d '\n' || echo "0.00") | |
| # Ensure variables are clean (no whitespace, default to 0 if empty) | |
| TOTAL_TESTS=$(echo "$TOTAL_TESTS" | sed 's/[^0-9]//g') | |
| PASSED_TESTS=$(echo "$PASSED_TESTS" | sed 's/[^0-9]//g') | |
| FAILED_TESTS=$(echo "$FAILED_TESTS" | sed 's/[^0-9]//g') | |
| SKIPPED_TESTS=$(echo "$SKIPPED_TESTS" | sed 's/[^0-9]//g') | |
| WARNINGS=$(echo "$WARNINGS" | sed 's/[^0-9]//g') | |
| # Set defaults if empty after cleaning | |
| TOTAL_TESTS=${TOTAL_TESTS:-0} | |
| PASSED_TESTS=${PASSED_TESTS:-0} | |
| FAILED_TESTS=${FAILED_TESTS:-0} | |
| SKIPPED_TESTS=${SKIPPED_TESTS:-0} | |
| WARNINGS=${WARNINGS:-0} | |
| DURATION=${DURATION:-0.00} | |
| echo "Debug: Parsed values for unit tests:" | |
| echo " TOTAL_TESTS='$TOTAL_TESTS'" | |
| echo " PASSED_TESTS='$PASSED_TESTS'" | |
| echo " FAILED_TESTS='$FAILED_TESTS'" | |
| echo " SKIPPED_TESTS='$SKIPPED_TESTS'" | |
| echo " WARNINGS='$WARNINGS'" | |
| echo " DURATION='$DURATION'" | |
| # Save results as JSON for later aggregation using jq to ensure valid JSON | |
| jq -n \ | |
| --arg python_version "${{ matrix.python-version }}" \ | |
| --arg test_type "unit" \ | |
| --arg total "${TOTAL_TESTS:-0}" \ | |
| --arg passed "${PASSED_TESTS:-0}" \ | |
| --arg failed "${FAILED_TESTS:-0}" \ | |
| --arg skipped "${SKIPPED_TESTS:-0}" \ | |
| --arg warnings "${WARNINGS:-0}" \ | |
| --arg duration "${DURATION:-0.00}" \ | |
| '{ | |
| python_version: $python_version, | |
| test_type: $test_type, | |
| total: (if $total == "" then 0 else ($total | tonumber) end), | |
| passed: (if $passed == "" then 0 else ($passed | tonumber) end), | |
| failed: (if $failed == "" then 0 else ($failed | tonumber) end), | |
| skipped: (if $skipped == "" then 0 else ($skipped | tonumber) end), | |
| warnings: (if $warnings == "" then 0 else ($warnings | tonumber) end), | |
| duration: $duration | |
| }' > unit_test_results_${{ matrix.python-version }}.json | |
| - name: Prepare unit test artifacts | |
| if: always() | |
| run: | | |
| # Create artifact directory and copy files that exist | |
| mkdir -p artifact_upload | |
| cp unit_test_results_${{ matrix.python-version }}.json artifact_upload/ || true | |
| cp unit_test_output.log artifact_upload/ || true | |
| if [ -d ".pytest_cache" ]; then | |
| cp -r .pytest_cache artifact_upload/ | |
| fi | |
| - name: Upload unit test results | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: unit-test-results-${{ matrix.python-version }} | |
| path: artifact_upload/ | |
| retention-days: 7 | |
| # Integration tests - run across all Python versions | |
| integration-tests: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.11", "3.12", "3.13"] | |
| fail-fast: false # Continue other versions even if one fails | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v3 | |
| - name: Restore uv cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/uv | |
| key: ${{ runner.os }}-uv-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }} | |
| restore-keys: | | |
| ${{ runner.os }}-uv-${{ matrix.python-version }}- | |
| ${{ runner.os }}-uv- | |
| - name: Create virtual environment and install dependencies | |
| run: | | |
| uv sync --frozen | |
| - name: Run integration tests | |
| run: | | |
| # Run tests with captured output for parsing | |
| ./test.sh integration 2>&1 | tee integration_test_output.log | |
| # Extract test results from pytest output | |
| echo "Parsing test results..." | |
| # Get test counts from the output with better parsing | |
| TOTAL_TESTS=$(grep -oE "[0-9]+ selected" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| PASSED_TESTS=$(grep -oE "[0-9]+ passed" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| FAILED_TESTS=$(grep -oE "[0-9]+ failed" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| SKIPPED_TESTS=$(grep -oE "[0-9]+ skipped" integration_test_output.log | grep -o "[0-9]*" | head -1 | tr -d '\n' || echo "0") | |
| WARNINGS=$(grep -c "warnings summary" integration_test_output.log | tr -d '\n' || echo "0") | |
| # Get test duration | |
| DURATION=$(grep -oE "in [0-9]+\.[0-9]+s" integration_test_output.log | grep -o "[0-9]*\.[0-9]*" | head -1 | tr -d '\n' || echo "0.00") | |
| # Ensure variables are clean (no whitespace, default to 0 if empty) | |
| TOTAL_TESTS=$(echo "$TOTAL_TESTS" | sed 's/[^0-9]//g') | |
| PASSED_TESTS=$(echo "$PASSED_TESTS" | sed 's/[^0-9]//g') | |
| FAILED_TESTS=$(echo "$FAILED_TESTS" | sed 's/[^0-9]//g') | |
| SKIPPED_TESTS=$(echo "$SKIPPED_TESTS" | sed 's/[^0-9]//g') | |
| WARNINGS=$(echo "$WARNINGS" | sed 's/[^0-9]//g') | |
| # Set defaults if empty after cleaning | |
| TOTAL_TESTS=${TOTAL_TESTS:-0} | |
| PASSED_TESTS=${PASSED_TESTS:-0} | |
| FAILED_TESTS=${FAILED_TESTS:-0} | |
| SKIPPED_TESTS=${SKIPPED_TESTS:-0} | |
| WARNINGS=${WARNINGS:-0} | |
| DURATION=${DURATION:-0.00} | |
| echo "Debug: Parsed values for integration tests:" | |
| echo " TOTAL_TESTS='$TOTAL_TESTS'" | |
| echo " PASSED_TESTS='$PASSED_TESTS'" | |
| echo " FAILED_TESTS='$FAILED_TESTS'" | |
| echo " SKIPPED_TESTS='$SKIPPED_TESTS'" | |
| echo " WARNINGS='$WARNINGS'" | |
| echo " DURATION='$DURATION'" | |
| # Save results as JSON for later aggregation using jq to ensure valid JSON | |
| jq -n \ | |
| --arg python_version "${{ matrix.python-version }}" \ | |
| --arg test_type "integration" \ | |
| --arg total "${TOTAL_TESTS:-0}" \ | |
| --arg passed "${PASSED_TESTS:-0}" \ | |
| --arg failed "${FAILED_TESTS:-0}" \ | |
| --arg skipped "${SKIPPED_TESTS:-0}" \ | |
| --arg warnings "${WARNINGS:-0}" \ | |
| --arg duration "${DURATION:-0.00}" \ | |
| '{ | |
| python_version: $python_version, | |
| test_type: $test_type, | |
| total: (if $total == "" then 0 else ($total | tonumber) end), | |
| passed: (if $passed == "" then 0 else ($passed | tonumber) end), | |
| failed: (if $failed == "" then 0 else ($failed | tonumber) end), | |
| skipped: (if $skipped == "" then 0 else ($skipped | tonumber) end), | |
| warnings: (if $warnings == "" then 0 else ($warnings | tonumber) end), | |
| duration: $duration | |
| }' > integration_test_results_${{ matrix.python-version }}.json | |
| - name: Prepare integration test artifacts | |
| if: always() | |
| run: | | |
| # Create artifact directory and copy files that exist | |
| mkdir -p artifact_upload_integration | |
| cp integration_test_results_${{ matrix.python-version }}.json artifact_upload_integration/ || true | |
| cp integration_test_output.log artifact_upload_integration/ || true | |
| if [ -d ".pytest_cache" ]; then | |
| cp -r .pytest_cache artifact_upload_integration/ | |
| fi | |
| if [ -d "htmlcov" ]; then | |
| cp -r htmlcov artifact_upload_integration/ | |
| fi | |
| - name: Upload integration test results | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: integration-test-results-${{ matrix.python-version }} | |
| path: artifact_upload_integration/ | |
| retention-days: 7 | |
| lint: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v3 | |
| - name: Restore uv cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/uv | |
| key: ${{ runner.os }}-uv-lint-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }} | |
| restore-keys: | | |
| ${{ runner.os }}-uv-lint- | |
| ${{ runner.os }}-uv- | |
| - name: Create virtual environment and install dependencies | |
| run: | | |
| uv sync --frozen | |
| - name: Run linting | |
| run: | | |
| uv run ruff check src/ tests/ examples/ | |
| uv run ruff format --check src/ tests/ examples/ | |
| security: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python 3.12 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.12" | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v3 | |
| - name: Restore uv cache | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/uv | |
| key: ${{ runner.os }}-uv-security-${{ hashFiles('**/pyproject.toml', '**/uv.lock') }} | |
| restore-keys: | | |
| ${{ runner.os }}-uv-security- | |
| ${{ runner.os }}-uv- | |
| - name: Create virtual environment and install dependencies | |
| run: | | |
| uv sync --frozen | |
| - name: Run security checks | |
| run: | | |
| uv run bandit -r src/ -f json -o bandit-report.json || true | |
| uv run safety check --json --output safety-report.json || true | |
| - name: Upload security reports | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: security-reports | |
| path: | | |
| bandit-report.json | |
| safety-report.json | |
| retention-days: 30 | |
| # Test Results Summary - runs after all tests complete | |
| test-summary: | |
| runs-on: ubuntu-latest | |
| needs: [unit-tests, integration-tests, lint, security] | |
| if: always() | |
| steps: | |
| - name: Download all test results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: "*-test-results-*" | |
| merge-multiple: true | |
| - name: Generate Test Summary | |
| if: always() | |
| run: | | |
| echo "# 🧪 Test Results Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| # Debug: List available files | |
| echo "Available test result files:" | |
| ls -la *test_results*.json || echo "No test result files found" | |
| # Create test results table | |
| echo "## Unit Tests" >> $GITHUB_STEP_SUMMARY | |
| echo "| Python Version | Total Tests | ✅ Passed | ❌ Failed | ⏭️ Skipped | ⚠️ Warnings | ⏱️ Duration |" >> $GITHUB_STEP_SUMMARY | |
| echo "|----------------|-------------|-----------|-----------|------------|-------------|------------|" >> $GITHUB_STEP_SUMMARY | |
| # Process unit test results | |
| UNIT_FILES_FOUND=false | |
| for file in unit_test_results_*.json; do | |
| if [ -f "$file" ]; then | |
| UNIT_FILES_FOUND=true | |
| echo "Processing unit test file: $file" | |
| # Validate JSON first | |
| if ! jq empty "$file" 2>/dev/null; then | |
| echo "Invalid JSON in $file, skipping..." | |
| continue | |
| fi | |
| VERSION=$(jq -r '.python_version // "unknown"' "$file") | |
| TOTAL=$(jq -r '.total // 0' "$file") | |
| PASSED=$(jq -r '.passed // 0' "$file") | |
| FAILED=$(jq -r '.failed // 0' "$file") | |
| SKIPPED=$(jq -r '.skipped // 0' "$file") | |
| WARNINGS=$(jq -r '.warnings // 0' "$file") | |
| DURATION=$(jq -r '.duration // "0.00"' "$file") | |
| # Add status emoji based on results | |
| STATUS_ICON="✅" | |
| if [ "$FAILED" != "0" ] && [ "$FAILED" != "null" ]; then | |
| STATUS_ICON="❌" | |
| fi | |
| echo "| $STATUS_ICON Python $VERSION | $TOTAL | $PASSED | $FAILED | $SKIPPED | $WARNINGS | ${DURATION}s |" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| done | |
| if [ "$UNIT_FILES_FOUND" = false ]; then | |
| echo "| ⚠️ No unit test results found | - | - | - | - | - | - |" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| # Create integration test results table | |
| echo "## Integration Tests" >> $GITHUB_STEP_SUMMARY | |
| echo "| Python Version | Total Tests | ✅ Passed | ❌ Failed | ⏭️ Skipped | ⚠️ Warnings | ⏱️ Duration |" >> $GITHUB_STEP_SUMMARY | |
| echo "|----------------|-------------|-----------|-----------|------------|-------------|------------|" >> $GITHUB_STEP_SUMMARY | |
| # Process integration test results | |
| INTEGRATION_FILES_FOUND=false | |
| for file in integration_test_results_*.json; do | |
| if [ -f "$file" ]; then | |
| INTEGRATION_FILES_FOUND=true | |
| echo "Processing integration test file: $file" | |
| # Validate JSON first | |
| if ! jq empty "$file" 2>/dev/null; then | |
| echo "Invalid JSON in $file, skipping..." | |
| continue | |
| fi | |
| VERSION=$(jq -r '.python_version // "unknown"' "$file") | |
| TOTAL=$(jq -r '.total // 0' "$file") | |
| PASSED=$(jq -r '.passed // 0' "$file") | |
| FAILED=$(jq -r '.failed // 0' "$file") | |
| SKIPPED=$(jq -r '.skipped // 0' "$file") | |
| WARNINGS=$(jq -r '.warnings // 0' "$file") | |
| DURATION=$(jq -r '.duration // "0.00"' "$file") | |
| # Add status emoji based on results | |
| STATUS_ICON="✅" | |
| if [ "$FAILED" != "0" ] && [ "$FAILED" != "null" ]; then | |
| STATUS_ICON="❌" | |
| fi | |
| echo "| $STATUS_ICON Python $VERSION | $TOTAL | $PASSED | $FAILED | $SKIPPED | $WARNINGS | ${DURATION}s |" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| done | |
| if [ "$INTEGRATION_FILES_FOUND" = false ]; then | |
| echo "| ⚠️ No integration test results found | - | - | - | - | - | - |" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| # Calculate totals across all versions and test types | |
| echo "## Overall Summary" >> $GITHUB_STEP_SUMMARY | |
| TOTAL_TESTS=0 | |
| TOTAL_PASSED=0 | |
| TOTAL_FAILED=0 | |
| TOTAL_SKIPPED=0 | |
| TOTAL_WARNINGS=0 | |
| for file in *_test_results_*.json; do | |
| if [ -f "$file" ]; then | |
| # Validate JSON first | |
| if ! jq empty "$file" 2>/dev/null; then | |
| echo "Invalid JSON in $file, skipping from totals..." | |
| continue | |
| fi | |
| TESTS=$(jq -r '.total // 0' "$file") | |
| PASSED=$(jq -r '.passed // 0' "$file") | |
| FAILED=$(jq -r '.failed // 0' "$file") | |
| SKIPPED=$(jq -r '.skipped // 0' "$file") | |
| WARNINGS=$(jq -r '.warnings // 0' "$file") | |
| # Only add to totals if we got valid numbers | |
| if [[ "$TESTS" =~ ^[0-9]+$ ]]; then | |
| TOTAL_TESTS=$((TOTAL_TESTS + TESTS)) | |
| fi | |
| if [[ "$PASSED" =~ ^[0-9]+$ ]]; then | |
| TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) | |
| fi | |
| if [[ "$FAILED" =~ ^[0-9]+$ ]]; then | |
| TOTAL_FAILED=$((TOTAL_FAILED + FAILED)) | |
| fi | |
| if [[ "$SKIPPED" =~ ^[0-9]+$ ]]; then | |
| TOTAL_SKIPPED=$((TOTAL_SKIPPED + SKIPPED)) | |
| fi | |
| if [[ "$WARNINGS" =~ ^[0-9]+$ ]]; then | |
| TOTAL_WARNINGS=$((TOTAL_WARNINGS + WARNINGS)) | |
| fi | |
| fi | |
| done | |
| echo "| Metric | Count |" >> $GITHUB_STEP_SUMMARY | |
| echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY | |
| echo "| 📊 Total Tests | $TOTAL_TESTS |" >> $GITHUB_STEP_SUMMARY | |
| echo "| ✅ Passed | $TOTAL_PASSED |" >> $GITHUB_STEP_SUMMARY | |
| echo "| ❌ Failed | $TOTAL_FAILED |" >> $GITHUB_STEP_SUMMARY | |
| echo "| ⏭️ Skipped | $TOTAL_SKIPPED |" >> $GITHUB_STEP_SUMMARY | |
| echo "| ⚠️ Warnings | $TOTAL_WARNINGS |" >> $GITHUB_STEP_SUMMARY | |
| # Add overall status | |
| if [ "$TOTAL_FAILED" -eq 0 ]; then | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "## 🎉 All Tests Passed!" >> $GITHUB_STEP_SUMMARY | |
| echo "All $TOTAL_TESTS tests passed successfully across all Python versions." >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "## ⚠️ Some Tests Failed" >> $GITHUB_STEP_SUMMARY | |
| echo "$TOTAL_FAILED out of $TOTAL_TESTS tests failed. Please check the individual job logs for details." >> $GITHUB_STEP_SUMMARY | |
| fi | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "---" >> $GITHUB_STEP_SUMMARY | |
| echo "*Generated on $(date)* | *Workflow run: ${{ github.run_id }}*" >> $GITHUB_STEP_SUMMARY |