Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl. #2160

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl.

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl. #2160

name: Run the aoti runner with CUDA using stories
on:
pull_request:
push:
branches:
- main
workflow_dispatch:
jobs:
test-runner-aot-cuda:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
runner: linux.g5.4xlarge.nvidia.gpu
secrets-env: "HF_TOKEN_PERIODIC"
gpu-arch-type: cuda
gpu-arch-version: "12.1"
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Download checkpoints"
# Install requirements
./install/install_requirements.sh cuda
bash torchchat/utils/scripts/build_native.sh aoti
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoints"
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
echo "::endgroup::"
echo "::group::Run inference"
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
set -eou pipefail
export MODEL_DIR=${PWD}/checkpoints/stories15M
export PROMPT="Once upon a time in a land far away"
for DTYPE in bfloat16; do
python torchchat.py generate --dtype ${DTYPE} --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" --device cuda
python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --output-dso-path /tmp/model.so
./cmake-out/aoti_run /tmp/model.so -d CUDA -z ${MODEL_DIR}/tokenizer.model -i "${PROMPT}"
done
echo "tests complete"
echo "******************************************"
echo "::endgroup::"