Add MLflow support and expose logging configuration in TrainingArgs #287
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-License-Identifier: Apache-2.0 | |
| name: "Run smoke tests via Tox::pytest (python 3.12)" | |
| # These tests will be long running and require accelerated hardware. | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| branch: | |
| type: string | |
| default: main | |
| # using this rather than pull_request because this workflow | |
| # needs to run in the context of the base branch (main) and | |
| # access the repo's secrets to start the AWS instances. | |
| pull_request_target: | |
| branches: | |
| - main | |
| - release-* | |
| paths: | |
| # note this should match the merging criteria in 'mergify.yml' | |
| - "**.py" | |
| - "tox.ini" | |
| - "pyproject.toml" | |
| - "requirements-dev.txt" | |
| - "requirements-cuda.txt" | |
| - "constraints-dev.txt" | |
| permissions: | |
| contents: read | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| ec2_runner_variant: "g6e.12xlarge" # 4x L40s | |
| jobs: | |
| start-large-ec2-runner: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| label: ${{ steps.launch-ec2-instance-with-fallback.outputs.label }} | |
| ec2-instance-id: ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-id }} | |
| ec2-instance-region: ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-region }} | |
| steps: | |
| - name: Checkout "launch-ec2-runner-with-fallback" in-house CI action | |
| uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
| with: | |
| repository: instructlab/ci-actions | |
| # clone the "ci-actions" repo to a local directory called "ci-actions", instead of overwriting the current WORKDIR contents | |
| path: ci-actions | |
| ref: release-v0.1 | |
| sparse-checkout: | | |
| actions/launch-ec2-runner-with-fallback | |
| - name: Launch EC2 Runner with Fallback | |
| id: launch-ec2-instance-with-fallback | |
| uses: ./ci-actions/actions/launch-ec2-runner-with-fallback | |
| env: | |
| TMPDIR: "/tmp" | |
| with: | |
| aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| github_token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
| regions_config: > | |
| [ | |
| { | |
| "region": "us-east-2", | |
| "subnets": { | |
| "us-east-2a": "${{ vars.SUBNET_US_EAST_2A }}", | |
| "us-east-2b": "${{ vars.SUBNET_US_EAST_2B }}", | |
| "us-east-2c": "${{ vars.SUBNET_US_EAST_2C }}" | |
| }, | |
| "ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_2 }}", | |
| "security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_2 }}" | |
| }, | |
| { | |
| "region": "us-east-1", | |
| "subnets": { | |
| "us-east-1a": "${{ vars.SUBNET_US_EAST_1A }}", | |
| "us-east-1b": "${{ vars.SUBNET_US_EAST_1B }}", | |
| "us-east-1c": "${{ vars.SUBNET_US_EAST_1C }}", | |
| "us-east-1d": "${{ vars.SUBNET_US_EAST_1D }}", | |
| "us-east-1e": "${{ vars.SUBNET_US_EAST_1E }}", | |
| "us-east-1f": "${{ vars.SUBNET_US_EAST_1F }}" | |
| }, | |
| "ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_1 }}", | |
| "security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_1 }}" | |
| } | |
| ] | |
| try_spot_instance_first: false | |
| ec2_instance_type: g6e.12xlarge | |
| aws_resource_tags: > | |
| [ | |
| {"Key": "Name", "Value": "instructlab-training-ci-github-large-runner"}, | |
| {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}, | |
| {"Key": "GitHubRef", "Value": "${{ github.ref }}"}, | |
| {"Key": "GitHubPR", "Value": "${{ github.event.number }}"} | |
| ] | |
| run-smoke-tests: | |
| needs: | |
| - start-large-ec2-runner | |
| runs-on: ${{needs.start-large-ec2-runner.outputs.label}} | |
| # It is important that this job has no write permissions and has | |
| # no access to any secrets. This part is where we are running | |
| # untrusted code from PRs. | |
| permissions: {} | |
| steps: | |
| - name: "Checkout code" | |
| uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
| with: | |
| fetch-depth: 0 | |
| ref: ${{inputs.branch}} | |
| - name: Run smoke tests | |
| uses: ./.github/actions/run-smoke | |
| with: | |
| python-version: 3.12 | |
| stop-large-ec2-runner: | |
| needs: | |
| - start-large-ec2-runner | |
| - run-smoke-tests | |
| runs-on: ubuntu-latest | |
| if: ${{ always() }} | |
| steps: | |
| - name: "Configure AWS credentials" | |
| uses: "aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df" # v4.2.1 | |
| with: | |
| aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| aws-region: ${{ needs.start-large-ec2-runner.outputs.ec2-instance-region }} | |
| - name: "Stop EC2 runner" | |
| uses: machulav/ec2-github-runner@fb91019e71385fb10dfcbec812b4de8c61589f7b # v2.4.1 | |
| with: | |
| mode: stop | |
| github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
| label: ${{ needs.start-large-ec2-runner.outputs.label }} | |
| ec2-instance-id: ${{ needs.start-large-ec2-runner.outputs.ec2-instance-id }} |