(draft/poc) Refactoring to improve llm usability #365

Workflow file for this run

	name: CI

	on:
	push:
	branches: [ main, develop ]
	pull_request:
	branches: [ main, develop ]

	# Prevent multiple workflow runs for the same PR/branch
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	# Set minimal required permissions
	permissions:
	contents: read
	actions: read

	jobs:
	# Unit tests - run across all Python versions
	unit-tests:
	runs-on: ubuntu-latest
	strategy:
	matrix:
	python-version: ["3.11", "3.12", "3.13"]
	fail-fast: false # Continue other versions even if one fails

	steps:
	- uses: actions/checkout@v4

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- name: Install uv
	uses: astral-sh/setup-uv@v3

	- name: Restore uv cache
	uses: actions/cache@v4
	with:
	path: ~/.cache/uv
	key: ${{ runner.os }}-uv-${{ matrix.python-version }}-${{ hashFiles('/pyproject.toml', '/uv.lock') }}
	restore-keys: \|
	${{ runner.os }}-uv-${{ matrix.python-version }}-
	${{ runner.os }}-uv-

	- name: Create virtual environment and install dependencies
	run: \|
	uv sync --frozen

	- name: Run unit tests
	run: \|
	# Run tests with JUnit XML output for parsing
	./test.sh unit 2>&1 \| tee unit_test_output.log

	# Extract test results from pytest output
	echo "Parsing test results..."

	# Get test counts from the output with better parsing
	TOTAL_TESTS=$(grep -oE "[0-9]+ selected" unit_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	PASSED_TESTS=$(grep -oE "[0-9]+ passed" unit_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	FAILED_TESTS=$(grep -oE "[0-9]+ failed" unit_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	SKIPPED_TESTS=$(grep -oE "[0-9]+ skipped" unit_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	WARNINGS=$(grep -c "warnings summary" unit_test_output.log \| tr -d '\n' \|\| echo "0")

	# Get test duration
	DURATION=$(grep -oE "in [0-9]+\.[0-9]+s" unit_test_output.log \| grep -o "[0-9]\.[0-9]" \| head -1 \| tr -d '\n' \|\| echo "0.00")

	# Ensure variables are clean (no whitespace, default to 0 if empty)
	TOTAL_TESTS=$(echo "$TOTAL_TESTS" \| sed 's/[^0-9]//g')
	PASSED_TESTS=$(echo "$PASSED_TESTS" \| sed 's/[^0-9]//g')
	FAILED_TESTS=$(echo "$FAILED_TESTS" \| sed 's/[^0-9]//g')
	SKIPPED_TESTS=$(echo "$SKIPPED_TESTS" \| sed 's/[^0-9]//g')
	WARNINGS=$(echo "$WARNINGS" \| sed 's/[^0-9]//g')

	# Set defaults if empty after cleaning
	TOTAL_TESTS=${TOTAL_TESTS:-0}
	PASSED_TESTS=${PASSED_TESTS:-0}
	FAILED_TESTS=${FAILED_TESTS:-0}
	SKIPPED_TESTS=${SKIPPED_TESTS:-0}
	WARNINGS=${WARNINGS:-0}
	DURATION=${DURATION:-0.00}

	echo "Debug: Parsed values for unit tests:"
	echo " TOTAL_TESTS='$TOTAL_TESTS'"
	echo " PASSED_TESTS='$PASSED_TESTS'"
	echo " FAILED_TESTS='$FAILED_TESTS'"
	echo " SKIPPED_TESTS='$SKIPPED_TESTS'"
	echo " WARNINGS='$WARNINGS'"
	echo " DURATION='$DURATION'"

	# Save results as JSON for later aggregation using jq to ensure valid JSON
	jq -n \
	--arg python_version "${{ matrix.python-version }}" \
	--arg test_type "unit" \
	--arg total "${TOTAL_TESTS:-0}" \
	--arg passed "${PASSED_TESTS:-0}" \
	--arg failed "${FAILED_TESTS:-0}" \
	--arg skipped "${SKIPPED_TESTS:-0}" \
	--arg warnings "${WARNINGS:-0}" \
	--arg duration "${DURATION:-0.00}" \
	'{
	python_version: $python_version,
	test_type: $test_type,
	total: (if $total == "" then 0 else ($total \| tonumber) end),
	passed: (if $passed == "" then 0 else ($passed \| tonumber) end),
	failed: (if $failed == "" then 0 else ($failed \| tonumber) end),
	skipped: (if $skipped == "" then 0 else ($skipped \| tonumber) end),
	warnings: (if $warnings == "" then 0 else ($warnings \| tonumber) end),
	duration: $duration
	}' > unit_test_results_${{ matrix.python-version }}.json

	- name: Prepare unit test artifacts
	if: always()
	run: \|
	# Create artifact directory and copy files that exist
	mkdir -p artifact_upload
	cp unit_test_results_${{ matrix.python-version }}.json artifact_upload/ \|\| true
	cp unit_test_output.log artifact_upload/ \|\| true
	if [ -d ".pytest_cache" ]; then
	cp -r .pytest_cache artifact_upload/
	fi

	- name: Upload unit test results
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: unit-test-results-${{ matrix.python-version }}
	path: artifact_upload/
	retention-days: 7

	# Integration tests - run across all Python versions
	integration-tests:
	runs-on: ubuntu-latest
	strategy:
	matrix:
	python-version: ["3.11", "3.12", "3.13"]
	fail-fast: false # Continue other versions even if one fails

	steps:
	- uses: actions/checkout@v4

	- name: Set up Python ${{ matrix.python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- name: Install uv
	uses: astral-sh/setup-uv@v3

	- name: Restore uv cache
	uses: actions/cache@v4
	with:
	path: ~/.cache/uv
	key: ${{ runner.os }}-uv-${{ matrix.python-version }}-${{ hashFiles('/pyproject.toml', '/uv.lock') }}
	restore-keys: \|
	${{ runner.os }}-uv-${{ matrix.python-version }}-
	${{ runner.os }}-uv-

	- name: Create virtual environment and install dependencies
	run: \|
	uv sync --frozen

	- name: Run integration tests
	run: \|
	# Run tests with captured output for parsing
	./test.sh integration 2>&1 \| tee integration_test_output.log

	# Extract test results from pytest output
	echo "Parsing test results..."

	# Get test counts from the output with better parsing
	TOTAL_TESTS=$(grep -oE "[0-9]+ selected" integration_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	PASSED_TESTS=$(grep -oE "[0-9]+ passed" integration_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	FAILED_TESTS=$(grep -oE "[0-9]+ failed" integration_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	SKIPPED_TESTS=$(grep -oE "[0-9]+ skipped" integration_test_output.log \| grep -o "[0-9]*" \| head -1 \| tr -d '\n' \|\| echo "0")
	WARNINGS=$(grep -c "warnings summary" integration_test_output.log \| tr -d '\n' \|\| echo "0")

	# Get test duration
	DURATION=$(grep -oE "in [0-9]+\.[0-9]+s" integration_test_output.log \| grep -o "[0-9]\.[0-9]" \| head -1 \| tr -d '\n' \|\| echo "0.00")

	# Ensure variables are clean (no whitespace, default to 0 if empty)
	TOTAL_TESTS=$(echo "$TOTAL_TESTS" \| sed 's/[^0-9]//g')
	PASSED_TESTS=$(echo "$PASSED_TESTS" \| sed 's/[^0-9]//g')
	FAILED_TESTS=$(echo "$FAILED_TESTS" \| sed 's/[^0-9]//g')
	SKIPPED_TESTS=$(echo "$SKIPPED_TESTS" \| sed 's/[^0-9]//g')
	WARNINGS=$(echo "$WARNINGS" \| sed 's/[^0-9]//g')

	# Set defaults if empty after cleaning
	TOTAL_TESTS=${TOTAL_TESTS:-0}
	PASSED_TESTS=${PASSED_TESTS:-0}
	FAILED_TESTS=${FAILED_TESTS:-0}
	SKIPPED_TESTS=${SKIPPED_TESTS:-0}
	WARNINGS=${WARNINGS:-0}
	DURATION=${DURATION:-0.00}

	echo "Debug: Parsed values for integration tests:"
	echo " TOTAL_TESTS='$TOTAL_TESTS'"
	echo " PASSED_TESTS='$PASSED_TESTS'"
	echo " FAILED_TESTS='$FAILED_TESTS'"
	echo " SKIPPED_TESTS='$SKIPPED_TESTS'"
	echo " WARNINGS='$WARNINGS'"
	echo " DURATION='$DURATION'"

	# Save results as JSON for later aggregation using jq to ensure valid JSON
	jq -n \
	--arg python_version "${{ matrix.python-version }}" \
	--arg test_type "integration" \
	--arg total "${TOTAL_TESTS:-0}" \
	--arg passed "${PASSED_TESTS:-0}" \
	--arg failed "${FAILED_TESTS:-0}" \
	--arg skipped "${SKIPPED_TESTS:-0}" \
	--arg warnings "${WARNINGS:-0}" \
	--arg duration "${DURATION:-0.00}" \
	'{
	python_version: $python_version,
	test_type: $test_type,
	total: (if $total == "" then 0 else ($total \| tonumber) end),
	passed: (if $passed == "" then 0 else ($passed \| tonumber) end),
	failed: (if $failed == "" then 0 else ($failed \| tonumber) end),
	skipped: (if $skipped == "" then 0 else ($skipped \| tonumber) end),
	warnings: (if $warnings == "" then 0 else ($warnings \| tonumber) end),
	duration: $duration
	}' > integration_test_results_${{ matrix.python-version }}.json

	- name: Prepare integration test artifacts
	if: always()
	run: \|
	# Create artifact directory and copy files that exist
	mkdir -p artifact_upload_integration
	cp integration_test_results_${{ matrix.python-version }}.json artifact_upload_integration/ \|\| true
	cp integration_test_output.log artifact_upload_integration/ \|\| true
	if [ -d ".pytest_cache" ]; then
	cp -r .pytest_cache artifact_upload_integration/
	fi
	if [ -d "htmlcov" ]; then
	cp -r htmlcov artifact_upload_integration/
	fi

	- name: Upload integration test results
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: integration-test-results-${{ matrix.python-version }}
	path: artifact_upload_integration/
	retention-days: 7

	lint:
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Set up Python 3.12
	uses: actions/setup-python@v5
	with:
	python-version: "3.12"

	- name: Install uv
	uses: astral-sh/setup-uv@v3

	- name: Restore uv cache
	uses: actions/cache@v4
	with:
	path: ~/.cache/uv
	key: ${{ runner.os }}-uv-lint-${{ hashFiles('/pyproject.toml', '/uv.lock') }}
	restore-keys: \|
	${{ runner.os }}-uv-lint-
	${{ runner.os }}-uv-

	- name: Create virtual environment and install dependencies
	run: \|
	uv sync --frozen

	- name: Run linting
	run: \|
	uv run ruff check src/ tests/ examples/
	uv run ruff format --check src/ tests/ examples/

	security:
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Set up Python 3.12
	uses: actions/setup-python@v5
	with:
	python-version: "3.12"

	- name: Install uv
	uses: astral-sh/setup-uv@v3

	- name: Restore uv cache
	uses: actions/cache@v4
	with:
	path: ~/.cache/uv
	key: ${{ runner.os }}-uv-security-${{ hashFiles('/pyproject.toml', '/uv.lock') }}
	restore-keys: \|
	${{ runner.os }}-uv-security-
	${{ runner.os }}-uv-

	- name: Create virtual environment and install dependencies
	run: \|
	uv sync --frozen

	- name: Run security checks
	run: \|
	uv run bandit -r src/ -f json -o bandit-report.json \|\| true
	uv run safety check --json --output safety-report.json \|\| true

	- name: Upload security reports
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: security-reports
	path: \|
	bandit-report.json
	safety-report.json
	retention-days: 30

	# Test Results Summary - runs after all tests complete
	test-summary:
	runs-on: ubuntu-latest
	needs: [unit-tests, integration-tests, lint, security]
	if: always()

	steps:
	- name: Download all test results
	uses: actions/download-artifact@v4
	with:
	pattern: "-test-results-"
	merge-multiple: true

	- name: Generate Test Summary
	if: always()
	run: \|
	echo "# 🧪 Test Results Summary" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	# Debug: List available files
	echo "Available test result files:"
	ls -la test_results.json \|\| echo "No test result files found"

	# Create test results table
	echo "## Unit Tests" >> $GITHUB_STEP_SUMMARY
	echo "\| Python Version \| Total Tests \| ✅ Passed \| ❌ Failed \| ⏭️ Skipped \| ⚠️ Warnings \| ⏱️ Duration \|" >> $GITHUB_STEP_SUMMARY
	echo "\|----------------\|-------------\|-----------\|-----------\|------------\|-------------\|------------\|" >> $GITHUB_STEP_SUMMARY

	# Process unit test results
	UNIT_FILES_FOUND=false
	for file in unit_test_results_*.json; do
	if [ -f "$file" ]; then
	UNIT_FILES_FOUND=true
	echo "Processing unit test file: $file"

	# Validate JSON first
	if ! jq empty "$file" 2>/dev/null; then
	echo "Invalid JSON in $file, skipping..."
	continue
	fi

	VERSION=$(jq -r '.python_version // "unknown"' "$file")
	TOTAL=$(jq -r '.total // 0' "$file")
	PASSED=$(jq -r '.passed // 0' "$file")
	FAILED=$(jq -r '.failed // 0' "$file")
	SKIPPED=$(jq -r '.skipped // 0' "$file")
	WARNINGS=$(jq -r '.warnings // 0' "$file")
	DURATION=$(jq -r '.duration // "0.00"' "$file")

	# Add status emoji based on results
	STATUS_ICON="✅"
	if [ "$FAILED" != "0" ] && [ "$FAILED" != "null" ]; then
	STATUS_ICON="❌"
	fi

	echo "\| $STATUS_ICON Python $VERSION \| $TOTAL \| $PASSED \| $FAILED \| $SKIPPED \| $WARNINGS \| ${DURATION}s \|" >> $GITHUB_STEP_SUMMARY
	fi
	done

	if [ "$UNIT_FILES_FOUND" = false ]; then
	echo "\| ⚠️ No unit test results found \| - \| - \| - \| - \| - \| - \|" >> $GITHUB_STEP_SUMMARY
	fi

	echo "" >> $GITHUB_STEP_SUMMARY

	# Create integration test results table
	echo "## Integration Tests" >> $GITHUB_STEP_SUMMARY
	echo "\| Python Version \| Total Tests \| ✅ Passed \| ❌ Failed \| ⏭️ Skipped \| ⚠️ Warnings \| ⏱️ Duration \|" >> $GITHUB_STEP_SUMMARY
	echo "\|----------------\|-------------\|-----------\|-----------\|------------\|-------------\|------------\|" >> $GITHUB_STEP_SUMMARY

	# Process integration test results
	INTEGRATION_FILES_FOUND=false
	for file in integration_test_results_*.json; do
	if [ -f "$file" ]; then
	INTEGRATION_FILES_FOUND=true
	echo "Processing integration test file: $file"

	# Validate JSON first
	if ! jq empty "$file" 2>/dev/null; then
	echo "Invalid JSON in $file, skipping..."
	continue
	fi

	VERSION=$(jq -r '.python_version // "unknown"' "$file")
	TOTAL=$(jq -r '.total // 0' "$file")
	PASSED=$(jq -r '.passed // 0' "$file")
	FAILED=$(jq -r '.failed // 0' "$file")
	SKIPPED=$(jq -r '.skipped // 0' "$file")
	WARNINGS=$(jq -r '.warnings // 0' "$file")
	DURATION=$(jq -r '.duration // "0.00"' "$file")

	# Add status emoji based on results
	STATUS_ICON="✅"
	if [ "$FAILED" != "0" ] && [ "$FAILED" != "null" ]; then
	STATUS_ICON="❌"
	fi

	echo "\| $STATUS_ICON Python $VERSION \| $TOTAL \| $PASSED \| $FAILED \| $SKIPPED \| $WARNINGS \| ${DURATION}s \|" >> $GITHUB_STEP_SUMMARY
	fi
	done

	if [ "$INTEGRATION_FILES_FOUND" = false ]; then
	echo "\| ⚠️ No integration test results found \| - \| - \| - \| - \| - \| - \|" >> $GITHUB_STEP_SUMMARY
	fi

	echo "" >> $GITHUB_STEP_SUMMARY

	# Calculate totals across all versions and test types
	echo "## Overall Summary" >> $GITHUB_STEP_SUMMARY

	TOTAL_TESTS=0
	TOTAL_PASSED=0
	TOTAL_FAILED=0
	TOTAL_SKIPPED=0
	TOTAL_WARNINGS=0

	for file in _test_results_.json; do
	if [ -f "$file" ]; then
	# Validate JSON first
	if ! jq empty "$file" 2>/dev/null; then
	echo "Invalid JSON in $file, skipping from totals..."
	continue
	fi

	TESTS=$(jq -r '.total // 0' "$file")
	PASSED=$(jq -r '.passed // 0' "$file")
	FAILED=$(jq -r '.failed // 0' "$file")
	SKIPPED=$(jq -r '.skipped // 0' "$file")
	WARNINGS=$(jq -r '.warnings // 0' "$file")

	# Only add to totals if we got valid numbers
	if [[ "$TESTS" =~ ^[0-9]+$ ]]; then
	TOTAL_TESTS=$((TOTAL_TESTS + TESTS))
	fi
	if [[ "$PASSED" =~ ^[0-9]+$ ]]; then
	TOTAL_PASSED=$((TOTAL_PASSED + PASSED))
	fi
	if [[ "$FAILED" =~ ^[0-9]+$ ]]; then
	TOTAL_FAILED=$((TOTAL_FAILED + FAILED))
	fi
	if [[ "$SKIPPED" =~ ^[0-9]+$ ]]; then
	TOTAL_SKIPPED=$((TOTAL_SKIPPED + SKIPPED))
	fi
	if [[ "$WARNINGS" =~ ^[0-9]+$ ]]; then
	TOTAL_WARNINGS=$((TOTAL_WARNINGS + WARNINGS))
	fi
	fi
	done

	echo "\| Metric \| Count \|" >> $GITHUB_STEP_SUMMARY
	echo "\|--------\|-------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| 📊 Total Tests \| $TOTAL_TESTS \|" >> $GITHUB_STEP_SUMMARY
	echo "\| ✅ Passed \| $TOTAL_PASSED \|" >> $GITHUB_STEP_SUMMARY
	echo "\| ❌ Failed \| $TOTAL_FAILED \|" >> $GITHUB_STEP_SUMMARY
	echo "\| ⏭️ Skipped \| $TOTAL_SKIPPED \|" >> $GITHUB_STEP_SUMMARY
	echo "\| ⚠️ Warnings \| $TOTAL_WARNINGS \|" >> $GITHUB_STEP_SUMMARY

	# Add overall status
	if [ "$TOTAL_FAILED" -eq 0 ]; then
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "## 🎉 All Tests Passed!" >> $GITHUB_STEP_SUMMARY
	echo "All $TOTAL_TESTS tests passed successfully across all Python versions." >> $GITHUB_STEP_SUMMARY
	else
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "## ⚠️ Some Tests Failed" >> $GITHUB_STEP_SUMMARY
	echo "$TOTAL_FAILED out of $TOTAL_TESTS tests failed. Please check the individual job logs for details." >> $GITHUB_STEP_SUMMARY
	fi

	echo "" >> $GITHUB_STEP_SUMMARY
	echo "---" >> $GITHUB_STEP_SUMMARY
	echo "Generated on $(date) \| Workflow run: ${{ github.run_id }}" >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

(draft/poc) Refactoring to improve llm usability #365

Workflow file

(draft/poc) Refactoring to improve llm usability #365

Uh oh!

Workflow file for this run