|
19 | 19 | description: "Pull request number or branch name" |
20 | 20 | required: true |
21 | 21 | default: "main" |
| 22 | + python_base_image: |
| 23 | + description: "Override PYTHON_BASE_IMAGE (leave empty to use default)" |
| 24 | + required: false |
| 25 | + default: "" |
| 26 | + docling_base_image: |
| 27 | + description: "Override DOCLING_BASE_IMAGE (leave empty to use default)" |
| 28 | + required: false |
| 29 | + default: "" |
| 30 | +env: |
| 31 | + QUAY_REGISTRY: quay.io |
| 32 | + INSTANCE_TYPE: "g6e.xlarge" |
22 | 33 |
|
23 | 34 | jobs: |
24 | | - test-local-pipelines: |
| 35 | + # This job always runs and provides clear feedback to contributors |
| 36 | + pr-check: |
| 37 | + runs-on: ubuntu-latest |
| 38 | + steps: |
| 39 | + - name: PR Check |
| 40 | + run: | |
| 41 | + echo "✅ PR received!" |
| 42 | + echo "" |
| 43 | + if [ "${{ github.repository }}" != "opendatahub-io/data-processing" ]; then |
| 44 | + echo "ℹ️ Note: Full CI tests (EC2 runners, pipeline tests) only run on PRs to opendatahub-io/data-processing." |
| 45 | + echo " Your PR will be tested automatically once submitted to the upstream repository's branch." |
| 46 | + else |
| 47 | + echo "🚀 Running full CI tests on upstream repository..." |
| 48 | + fi |
| 49 | +
|
| 50 | + launch-ec2-runner: |
| 51 | + if: github.repository == 'opendatahub-io/data-processing' |
25 | 52 | runs-on: ubuntu-latest |
| 53 | + permissions: |
| 54 | + id-token: write # This is required for OIDC (AWS auth) |
| 55 | + contents: read |
| 56 | + outputs: |
| 57 | + label: ${{ steps.start-ec2-runner.outputs.label }} |
| 58 | + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} |
| 59 | + |
| 60 | + steps: |
| 61 | + - name: Checkout repository |
| 62 | + uses: actions/checkout@v6 |
| 63 | + |
| 64 | + - name: Configure AWS Credentials |
| 65 | + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 |
| 66 | + with: |
| 67 | + role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.DATA_PROCESSING_IAM_ROLE }}" |
| 68 | + aws-region: us-east-2 |
| 69 | + role-session-name: odh-data-processing # For tracking in CloudTrail |
| 70 | + |
| 71 | + - name: Start Data Processing EC2 runner |
| 72 | + id: start-ec2-runner |
| 73 | + uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3 |
| 74 | + with: |
| 75 | + mode: start |
| 76 | + github-token: "${{ secrets.DATA_PROCESSING_GH_PERSONAL_ACCESS_TOKEN }}" |
| 77 | + ec2-instance-type: "${{ env.INSTANCE_TYPE }}" |
| 78 | + availability-zones-config: > |
| 79 | + [ |
| 80 | + {"imageId": "${{ vars.US_EAST_2_AMI_ID }}", "subnetId": "${{ vars.US_EAST_2A_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"}, |
| 81 | + {"imageId": "${{ vars.US_EAST_2_AMI_ID }}", "subnetId": "${{ vars.US_EAST_2B_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"}, |
| 82 | + {"imageId": "${{ vars.US_EAST_2_AMI_ID }}", "subnetId": "${{ vars.US_EAST_2C_SUBNET_ID }}", "securityGroupId": "${{ vars.US_EAST_2_SG_ID }}"} |
| 83 | + ] |
| 84 | + iam-role-name: "${{ vars.DATA_PROCESSING_IAM_ROLE }}" |
| 85 | + aws-resource-tags: > |
| 86 | + [ |
| 87 | + {"Key": "Name", "Value": "data-processing-gh-runner"}, |
| 88 | + {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, |
| 89 | + {"Key": "GitHubRef", "Value": "${{ github.ref }}"}, |
| 90 | + {"Key": "GitHubPR", "Value": "${{ github.event.number }}"} |
| 91 | + ] |
| 92 | + test-local-pipelines: |
| 93 | + if: github.repository == 'opendatahub-io/data-processing' |
| 94 | + needs: |
| 95 | + - launch-ec2-runner |
| 96 | + runs-on: ${{ needs.launch-ec2-runner.outputs.label }} |
26 | 97 | strategy: |
27 | 98 | fail-fast: false |
28 | 99 | matrix: |
29 | 100 | pipeline: |
30 | 101 | - docling-standard |
31 | | - #- docling-vlm |
| 102 | + - docling-vlm |
32 | 103 |
|
33 | 104 | steps: |
34 | | - - name: Checkout |
35 | | - uses: actions/checkout@v4 |
36 | | - |
37 | | - - name: Set up Python 3.12 |
38 | | - uses: actions/setup-python@v5 |
39 | | - with: |
40 | | - python-version: "3.12" |
41 | | - |
| 105 | + - name: Setup Environment |
| 106 | + run: echo "Running on EC2 ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }}" |
| 107 | + |
| 108 | + - uses: actions/checkout@v4 |
| 109 | + - name: Setup System Dependencies (Python + Docker) |
| 110 | + run: | |
| 111 | + # Install Python |
| 112 | + sudo dnf install -y python3.11 python3-pip |
| 113 | + |
| 114 | + # Install Docker CE from official repository |
| 115 | + sudo dnf config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo |
| 116 | + sudo dnf install -y docker-ce docker-ce-cli containerd.io |
| 117 | + |
| 118 | + # Start Docker daemon |
| 119 | + sudo systemctl start docker |
| 120 | + sudo systemctl enable docker |
| 121 | + |
| 122 | + # Add current user to docker group |
| 123 | + sudo usermod -aG docker $(whoami) |
| 124 | + |
| 125 | + # Apply group membership without logout (temporary for this session) |
| 126 | + sudo chmod 666 /var/run/docker.sock |
42 | 127 | - name: Install minimal requirements |
43 | | - run: pip install docker kfp |
44 | | - |
| 128 | + run: | |
| 129 | + # Setup Pip |
| 130 | + /usr/bin/python3.11 -m ensurepip --upgrade >/dev/null 2>&1 |
| 131 | + /usr/bin/python3.11 -m pip install --upgrade pip |
| 132 | + |
| 133 | + # 1. Install Requirements (Generic) |
| 134 | + # We do this first so we can overwrite any bad CPU-versions it pulls in |
| 135 | + /usr/bin/python3.11 -m pip install docker kfp==2.14.6 |
45 | 136 | - name: Create output directory |
46 | 137 | working-directory: kubeflow-pipelines/${{ matrix.pipeline }} |
47 | 138 | run: | |
48 | 139 | mkdir -p local_outputs |
49 | | - chmod 755 local_outputs |
| 140 | + chmod 777 local_outputs |
| 141 | + - name: Log in to Quay Container Registry |
| 142 | + uses: docker/login-action@v3 |
| 143 | + with: |
| 144 | + registry: ${{ env.QUAY_REGISTRY }} |
| 145 | + username: ${{ secrets.QUAY_USERNAME }} |
| 146 | + password: ${{ secrets.QUAY_PASSWORD }} |
50 | 147 |
|
51 | 148 | - name: Run local pipeline |
| 149 | + timeout-minutes: 15 |
52 | 150 | working-directory: kubeflow-pipelines/${{ matrix.pipeline }} |
53 | | - run: python local_run.py |
| 151 | + run: | |
| 152 | + # Only set env vars if inputs are provided (non-empty) |
| 153 | + if [ -n "${{ github.event.inputs.python_base_image }}" ]; then |
| 154 | + export PYTHON_BASE_IMAGE="${{ github.event.inputs.python_base_image }}" |
| 155 | + fi |
| 156 | + if [ -n "${{ github.event.inputs.docling_base_image }}" ]; then |
| 157 | + export DOCLING_BASE_IMAGE="${{ github.event.inputs.docling_base_image }}" |
| 158 | + fi |
| 159 | + /usr/bin/python3.11 local_run.py |
| 160 | + - name: Upload logs on failure |
| 161 | + if: failure() |
| 162 | + uses: actions/upload-artifact@v4 |
| 163 | + with: |
| 164 | + name: ${{ matrix.pipeline }}-logs |
| 165 | + path: kubeflow-pipelines/${{ matrix.pipeline }}/local_outputs/ |
| 166 | + retention-days: 7 |
| 167 | + stop-ec2-runner: |
| 168 | + if: github.repository == 'opendatahub-io/data-processing' && always() |
| 169 | + permissions: |
| 170 | + id-token: write # This is required for OIDC (AWS auth) |
| 171 | + contents: read |
| 172 | + needs: |
| 173 | + - launch-ec2-runner |
| 174 | + - test-local-pipelines |
| 175 | + |
| 176 | + runs-on: ubuntu-latest |
| 177 | + steps: |
| 178 | + - name: Configure AWS credentials |
| 179 | + uses: aws-actions/configure-aws-credentials@61815dcd50bd041e203e49132bacad1fd04d2708 # v5.1.1 |
| 180 | + with: |
| 181 | + role-to-assume: "arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/${{ vars.DATA_PROCESSING_IAM_ROLE }}" |
| 182 | + aws-region: us-east-2 |
| 183 | + role-session-name: odh-data-processing # For tracking in CloudTrail |
| 184 | + |
| 185 | + - name: Stop EC2 runner |
| 186 | + uses: machulav/ec2-github-runner@a6dbcefcf8a31a861f5e078bb153ed332130c512 # v2.4.3 |
| 187 | + with: |
| 188 | + mode: stop |
| 189 | + github-token: "${{ secrets.DATA_PROCESSING_GH_PERSONAL_ACCESS_TOKEN }}" |
| 190 | + label: ${{ needs.launch-ec2-runner.outputs.label }} |
| 191 | + ec2-instance-id: ${{ needs.launch-ec2-runner.outputs.ec2-instance-id }} |
| 192 | + |
0 commit comments