Run smoke tests via Tox::pytest (python 3.12)

Add MLflow support and expose logging configuration in TrainingArgs #287

Workflow file for this run

.github/workflows/smoke-py312.yaml at 4fe3ab2

	# SPDX-License-Identifier: Apache-2.0

	name: "Run smoke tests via Tox::pytest (python 3.12)"
	# These tests will be long running and require accelerated hardware.

	on:
	workflow_dispatch:
	inputs:
	branch:
	type: string
	default: main
	# using this rather than pull_request because this workflow
	# needs to run in the context of the base branch (main) and
	# access the repo's secrets to start the AWS instances.
	pull_request_target:
	branches:
	- main
	- release-*
	paths:
	# note this should match the merging criteria in 'mergify.yml'
	- "**.py"
	- "tox.ini"
	- "pyproject.toml"
	- "requirements-dev.txt"
	- "requirements-cuda.txt"
	- "constraints-dev.txt"

	permissions:
	contents: read

	defaults:
	run:
	shell: bash

	env:
	ec2_runner_variant: "g6e.12xlarge" # 4x L40s

	jobs:
	start-large-ec2-runner:
	runs-on: ubuntu-latest
	outputs:
	label: ${{ steps.launch-ec2-instance-with-fallback.outputs.label }}
	ec2-instance-id: ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-id }}
	ec2-instance-region: ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-region }}
	steps:
	- name: Checkout "launch-ec2-runner-with-fallback" in-house CI action
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	repository: instructlab/ci-actions
	# clone the "ci-actions" repo to a local directory called "ci-actions", instead of overwriting the current WORKDIR contents
	path: ci-actions
	ref: release-v0.1
	sparse-checkout: \|
	actions/launch-ec2-runner-with-fallback

	- name: Launch EC2 Runner with Fallback
	id: launch-ec2-instance-with-fallback
	uses: ./ci-actions/actions/launch-ec2-runner-with-fallback
	env:
	TMPDIR: "/tmp"
	with:
	aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	github_token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	regions_config: >
	[
	{
	"region": "us-east-2",
	"subnets": {
	"us-east-2a": "${{ vars.SUBNET_US_EAST_2A }}",
	"us-east-2b": "${{ vars.SUBNET_US_EAST_2B }}",
	"us-east-2c": "${{ vars.SUBNET_US_EAST_2C }}"
	},
	"ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_2 }}",
	"security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_2 }}"
	},
	{
	"region": "us-east-1",
	"subnets": {
	"us-east-1a": "${{ vars.SUBNET_US_EAST_1A }}",
	"us-east-1b": "${{ vars.SUBNET_US_EAST_1B }}",
	"us-east-1c": "${{ vars.SUBNET_US_EAST_1C }}",
	"us-east-1d": "${{ vars.SUBNET_US_EAST_1D }}",
	"us-east-1e": "${{ vars.SUBNET_US_EAST_1E }}",
	"us-east-1f": "${{ vars.SUBNET_US_EAST_1F }}"
	},
	"ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_1 }}",
	"security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_1 }}"
	}
	]
	try_spot_instance_first: false
	ec2_instance_type: g6e.12xlarge
	aws_resource_tags: >
	[
	{"Key": "Name", "Value": "instructlab-training-ci-github-large-runner"},
	{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
	{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
	{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
	]

	run-smoke-tests:
	needs:
	- start-large-ec2-runner
	runs-on: ${{needs.start-large-ec2-runner.outputs.label}}
	# It is important that this job has no write permissions and has
	# no access to any secrets. This part is where we are running
	# untrusted code from PRs.
	permissions: {}
	steps:
	- name: "Checkout code"
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	fetch-depth: 0
	ref: ${{inputs.branch}}

	- name: Run smoke tests
	uses: ./.github/actions/run-smoke
	with:
	python-version: 3.12

	stop-large-ec2-runner:
	needs:
	- start-large-ec2-runner
	- run-smoke-tests
	runs-on: ubuntu-latest
	if: ${{ always() }}
	steps:
	- name: "Configure AWS credentials"
	uses: "aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df" # v4.2.1
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: ${{ needs.start-large-ec2-runner.outputs.ec2-instance-region }}

	- name: "Stop EC2 runner"
	uses: machulav/ec2-github-runner@fb91019e71385fb10dfcbec812b4de8c61589f7b # v2.4.1
	with:
	mode: stop
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	label: ${{ needs.start-large-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ needs.start-large-ec2-runner.outputs.ec2-instance-id }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add MLflow support and expose logging configuration in TrainingArgs #287

Workflow file

Add MLflow support and expose logging configuration in TrainingArgs #287

Uh oh!

Workflow file for this run