Skip to content

Fix comprehensive CI/CD pipeline issues and repository cleanup #68

Fix comprehensive CI/CD pipeline issues and repository cleanup

Fix comprehensive CI/CD pipeline issues and repository cleanup #68

Workflow file for this run

# Comprehensive Documentation Generation Pipeline
# Generates HTML, Markdown, and PDF documentation with proper cleanup
name: Documentation Generation & Deployment
permissions:
contents: read
actions: write
on:
push:
branches: [ main, develop, bugfix-* ]
paths:
- '**.py'
- 'doc/**'
- 'scripts/generate-docs.py'
pull_request:
paths:
- '**.py'
- 'doc/**'
- 'scripts/generate-docs.py'
env:
PYTHON_VERSION: '3.11'
DOCS_SOURCE: 'doc/codeDocs'
DOCS_OUTPUT: 'documentation-artifacts'
jobs:
build-documentation:
name: Build Multi-Format Documentation
runs-on: ubuntu-latest
outputs:
docs-version: ${{ steps.version.outputs.version }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python Environment
uses: ./.github/actions/setup-python-env
with:
python-version: ${{ env.PYTHON_VERSION }}
install-dev-reqs: 'false'
install-docs-reqs: 'true'
- name: Install system dependencies
run: |
sudo apt-get update -qq
sudo apt-get install -y --no-install-recommends \
graphviz \
pandoc \
texlive-latex-recommended \
texlive-fonts-recommended \
texlive-latex-extra \
latexmk
- name: Set documentation version
id: version
run: |
VERSION="$(date +'%Y.%m.%d')-$(git rev-parse --short HEAD)"
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Documentation version: $VERSION"
- name: Clean previous builds
run: |
echo "🧹 Cleaning previous documentation builds..."
rm -rf doc/codeDocs/_build/ || true
rm -rf doc/codeDocs/_static/diagrams/*.png || true
rm -rf doc/codeDocs/_static/diagrams/*.dot || true
find doc/codeDocs/ -name '*.rst' -not -name 'index.rst' -not -name 'overview.rst' -delete || true
mkdir -p ${{ env.DOCS_OUTPUT }}
- name: Build comprehensive documentation
run: |
cd ${{ github.workspace }}
./scripts/build-docs.sh
# Update manifest with CI information
if [ -f documentation-output/manifest.json ]; then
python3 -c "import json; manifest = json.load(open('documentation-output/manifest.json')); manifest.update({'ci_run_number': '${{ github.run_number }}', 'ci_sha': '${{ github.sha }}', 'ci_ref': '${{ github.ref }}', 'repository': '${{ github.repository }}'}); json.dump(manifest, open('documentation-output/manifest.json', 'w'), indent=2)"
fi
- name: Build HTML documentation
run: |
echo "🌐 Building HTML documentation..."
cd ${{ env.DOCS_SOURCE }}
sphinx-build -W -b html . _build/html
echo "HTML documentation built successfully"
- name: Build Markdown documentation
run: |
echo "📝 Building Markdown documentation..."
cd ${{ env.DOCS_SOURCE }}
sphinx-build -b markdown . _build/markdown
# Create comprehensive README
cat > _build/markdown/README.md << 'EOF'
# unstructuredDataHandler Documentation
**Version:** ${{ steps.version.outputs.version }}
**Generated:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')
**Repository:** ${{ github.repository }}
**Branch:** ${{ github.ref_name }}
This directory contains the complete documentation in Markdown format.
## Navigation
- [Main Documentation](index.md) - Start here
- [System Overview](overview.md) - Architecture and design
- [API Reference](modules.md) - Complete API documentation
## Module Documentation
EOF
find _build/markdown -name '*.md' -not -name 'README.md' | sort | while read file; do
basename="$(basename "$file" .md)"
echo "- [$basename]($file)" >> _build/markdown/README.md
done
echo "Markdown documentation built successfully"
- name: Build PDF documentation
run: |
echo "📄 Building PDF documentation..."
cd ${{ env.DOCS_SOURCE }}
sphinx-build -b latex . _build/latex
cd _build/latex
# Build PDF with error handling
make all-pdf || {
echo "⚠️ PDF generation failed, creating fallback PDF from HTML"
cd ../html
# Fallback: convert HTML to PDF using pandoc
find . -name '*.html' -exec basename {} .html \; | head -1 | xargs -I {} \
pandoc {}.html -o ../../_build/unstructuredDataHandler-docs.pdf --pdf-engine=xelatex || \
echo "⚠️ PDF generation skipped - LaTeX not fully configured"
}
echo "PDF documentation processing completed"
- name: Package documentation artifacts
run: |
echo "📦 Packaging documentation artifacts..."
cd ${{ env.DOCS_SOURCE }}/_build
# HTML Documentation
if [ -d "html" ]; then
tar -czf "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/html-docs-${{ steps.version.outputs.version }}.tar.gz" -C html .
cp -r html "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/html/"
fi
# Markdown Documentation
if [ -d "markdown" ]; then
tar -czf "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/markdown-docs-${{ steps.version.outputs.version }}.tar.gz" -C markdown .
cp -r markdown "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/markdown/"
fi
# PDF Documentation
if [ -f "latex/unstructureddatahandler.pdf" ]; then
cp "latex/unstructureddatahandler.pdf" "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/unstructuredDataHandler-docs-${{ steps.version.outputs.version }}.pdf"
elif [ -f "unstructuredDataHandler-docs.pdf" ]; then
cp "unstructuredDataHandler-docs.pdf" "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/"
fi
# Create manifest
cat > "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/manifest.json" << EOF
{
"version": "${{ steps.version.outputs.version }}",
"generated_at": "$(date -u -Iseconds)",
"repository": "${{ github.repository }}",
"branch": "${{ github.ref_name }}",
"commit": "${{ github.sha }}",
"formats": {
"html": "html/",
"markdown": "markdown/",
"pdf": "unstructuredDataHandler-docs-${{ steps.version.outputs.version }}.pdf"
}
}
EOF
echo "📊 Documentation packaging summary:"
ls -la "$GITHUB_WORKSPACE/${{ env.DOCS_OUTPUT }}/"
- name: Cleanup temporary files
run: |
echo "🧹 Cleaning up temporary files..."
cd ${{ env.DOCS_SOURCE }}
# Remove build artifacts but keep source
rm -rf _build/doctrees/ || true
rm -rf _build/latex/*.aux _build/latex/*.log _build/latex/*.out _build/latex/*.toc || true
find _static/diagrams/ -name '*.dot' -delete || true
# Clean Python cache
find . -type d -name '__pycache__' -exec rm -rf {} + || true
find . -name '*.pyc' -delete || true
echo "Cleanup completed"
- name: Upload HTML Documentation
uses: actions/upload-artifact@v4
with:
name: html-documentation
path: ${{ env.DOCS_OUTPUT }}/html/
retention-days: 90
- name: Upload Markdown Documentation
uses: actions/upload-artifact@v4
with:
name: markdown-documentation
path: ${{ env.DOCS_OUTPUT }}/markdown/
retention-days: 90
- name: Upload PDF Documentation
uses: actions/upload-artifact@v4
if: hashFiles('documentation-artifacts/*.pdf') != ''
with:
name: pdf-documentation
path: ${{ env.DOCS_OUTPUT }}/*.pdf
retention-days: 90
- name: Upload Complete Documentation Archive
uses: actions/upload-artifact@v4
with:
name: complete-documentation-${{ steps.version.outputs.version }}
path: ${{ env.DOCS_OUTPUT }}/
retention-days: 90