Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl. #3087

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl.

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl. #3087

Workflow file for this run

name: pull
on:
pull_request:
push:
branches:
- main
workflow_dispatch:
jobs:
gather-models-cpu:
runs-on: ubuntu-22.04
outputs:
models: ${{ steps.gather-models-cpu.outputs.models }}
steps:
- uses: actions/checkout@v3
with:
submodules: 'false'
- uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Extract the list of models to run on CPU
id: gather-models-cpu
run: |
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "cpu"
test-cpu-compile:
name: test-cpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-cpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
TORCHCHAT_ROOT: ${{ github.workspace }}
REPO_NAME: ${{ matrix.repo_name }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Print machine info
run: |
echo "$(uname -a)"
- name: Install dependencies
run: |
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Download checkpoints
run: |
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
- name: Run validation
run: |
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
pushd ${TORCHCHAT_ROOT}
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "compile"
test-cpu-aoti:
name: test-cpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-cpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
TORCHCHAT_ROOT: ${{ github.workspace }}
REPO_NAME: ${{ matrix.repo_name }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Print machine info
run: |
echo "$(uname -a)"
- name: Install dependencies
run: |
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Download checkpoints
run: |
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
- name: Run validation
run: |
pushd ${TORCHCHAT_ROOT}
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "aoti"
test-cpu-eval-sanity-check:
name: test-cpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-cpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
TORCHCHAT_ROOT: ${{ github.workspace }}
REPO_NAME: ${{ matrix.repo_name }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Print machine info
run: |
echo "$(uname -a)"
- name: Install dependencies
run: |
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Download checkpoints
run: |
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
- name: Run validation
run: |
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
pushd ${TORCHCHAT_ROOT}
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-bfloat16"
test-cpu-eval-sanity-check-float16:
name: test-cpu-eval-sanity-check-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-cpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
TORCHCHAT_ROOT: ${{ github.workspace }}
REPO_NAME: ${{ matrix.repo_name }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Print machine info
run: |
echo "$(uname -a)"
- name: Install dependencies
run: |
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Download checkpoints
run: |
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
- name: Run validation
run: |
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
pushd ${TORCHCHAT_ROOT}
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float16"
test-cpu-eval-sanity-check-float32:
name: test-cpu-eval-sanity-check-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-cpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
TORCHCHAT_ROOT: ${{ github.workspace }}
REPO_NAME: ${{ matrix.repo_name }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Print machine info
run: |
echo "$(uname -a)"
- name: Install dependencies
run: |
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Download checkpoints
run: |
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
- name: Run validation
run: |
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
pushd ${TORCHCHAT_ROOT}
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
gather-models-gpu:
runs-on: ubuntu-22.04
outputs:
models: ${{ steps.gather-models-gpu.outputs.models }}
steps:
- uses: actions/checkout@v3
with:
submodules: 'false'
- uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Extract the list of models to run on GPU
id: gather-models-gpu
run: |
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu"
test-gpu-compile:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"
echo "::group::Convert checkpoint"
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"
echo "::group::Run inference"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
echo "::endgroup::"
test-gpu-aoti-bfloat16:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
timeout: 60
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"
echo "::group::Convert checkpoint"
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"
echo "::group::Run inference"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
echo "::endgroup::"
test-gpu-aoti-float32:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"
echo "::group::Convert checkpoint"
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"
echo "::group::Run inference"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float32"
echo "::endgroup::"
echo "::group::Run inference with quantize file"
if [ $(uname -s) != Darwin ]; then
python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
fi
echo "::endgroup::"
test-gpu-aoti-float16:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"
echo "::group::Convert checkpoint"
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"
echo "::group::Run inference"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float16"
echo "::endgroup::"
echo "::group::Run inference with quantize file"
if [ $(uname -s) == Darwin ]; then
python3 torchchat.py export --output-dso-path /tmp/model.so --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
python3 torchchat.py generate --dso-path /tmp/model.so --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
fi
echo "::endgroup::"
test-gpu-eval-sanity-check:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
needs: gather-models-gpu
strategy:
matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
fail-fast: false
with:
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.1"
script: |
echo "::group::Print machine info"
nvidia-smi
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
echo "::group::Install required packages"
./install/install_requirements.sh cuda
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoint"
export REPO_NAME=${{ matrix.repo_name }}
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
echo "::endgroup::"
echo "::group::Convert checkpoint"
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
echo "::endgroup::"
echo "::group::Run eval"
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "eval_sanity_check"
echo "::endgroup::"
test-tinystories-executorch:
strategy:
matrix:
runner: [16-core-ubuntu, macos-14-xlarge]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.10.11'
- name: Setup Xcode
if: runner.os == 'macOS'
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '15.3'
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install requirements
run: |
echo "Intalling pip3 packages"
./install/install_requirements.sh
export TORCHCHAT_ROOT=$PWD
./torchchat/utils/scripts/install_et.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'
cd ../..
echo "Inside: ${PWD}"
- name: Download checkpoints
run: |
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
mkdir gguf_files
export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
export GGUF_TOKENIZER_PATH=gguf_files/tokenizer.model
wget -O ${GGUF_PATH} "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true"
wget -O ${GGUF_TOKENIZER_PATH} https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
- name: Run inference
run: |
export MODEL_PATH=${PWD}/checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --device cpu
python torchchat.py export --checkpoint-path ${MODEL_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte
echo "Tests complete."
- name: Run inference
run: |
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
echo "******************************************"
echo "*** vanilla ***"
echo "******************************************"
python torchchat.py export --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "*** --quantize torchchat/quant_config/mobile.json ***"
echo "******************************************"
# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "******* Emb: channel-wise quantized ******"
echo "******************************************"
python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "******** Emb: group-wise quantized *******"
echo "******************************************"
python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "**** Emb 4bit: channel-wise quantized ****"
echo "******************************************"
python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "****** Emb 4bit: group-wise quantized ****"
echo "******************************************"
python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "******* INT8 channel-wise quantized ******"
echo "******************************************"
python torchchat.py export --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "******** INT8 group-wise quantized *******"
echo "******************************************"
python torchchat.py export --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "******** ET: a8w4dq INT4 group-wise quantized *******"
echo "******************************************"
python torchchat.py export --quant '{"linear:a8w4dq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "******** INT4 group-wise quantized *******"
echo "******************************************"
# python torchchat.py export --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
# python3 torchchat.py generate --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "tests complete"
echo "******************************************"
- name: Run GGUF export + inference
run: |
export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
export GGUF_TOKENIZER_PATH=gguf_files/tokenizer.model
python torchchat.py export --gguf-path ${GGUF_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
python torchchat.py generate --gguf-path ${GGUF_PATH} --pte-path ${PWD}/${MODEL_NAME}.pte --tokenizer-path ${GGUF_TOKENIZER_PATH} --temperature 0 --max-new-tokens 20
echo "Tests complete."
torchchat-command-load-test:
strategy:
matrix:
runner: [macos-14]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.10.11'
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install requirements
run: |
echo "Installing pip3 packages"
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Download Stories files
run: |
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
- name: Test generate
run: |
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
python3 torchchat.py generate --device cpu --checkpoint-path ${MODEL_PATH} --temperature 0
python torchchat.py generate --device cpu --checkpoint-path ${MODEL_PATH} --temperature 0
echo "Tests complete."
- name: Test download
run: |
python torchchat.py list
python torchchat.py download stories15m
python torchchat.py generate stories15M --device cpu
python torchchat.py remove stories15m
test-mps:
strategy:
matrix:
runner: [macos-m1-stable ]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.10.11
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Run test
run: |
export PYTHON_VERSION="3.10"
set -x
# NS/MC: Remove previous installation of torch and torchao first
# as this script does not install anything into conda env but rather as system dep
pip3 uninstall -y torch || true
set -eou pipefail
pip3 uninstall -y torchao || true
set -eou pipefail
echo "::group::Print machine info"
uname -a
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
echo "::endgroup::"
echo "::group::Install requirements"
# Install requirements
./install/install_requirements.sh
ls -la
pwd
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoints"
(
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
)
echo "::endgroup::"
echo "::group::Run inference"
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
python3 torchchat.py generate --device mps --checkpoint-path ${MODEL_PATH} --temperature 0
echo "************************************************************"
echo "*** embedding"
echo "************************************************************"
python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
echo "************************************************************"
echo "*** linear int8"
echo "************************************************************"
python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
echo "************************************************************"
echo "*** linear int4"
echo "************************************************************"
PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
test-gguf-util:
strategy:
matrix:
runner: [macos-14]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.10.11
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install requirements
run: |
echo "Intalling pip3 packages"
pip3 install gguf
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
git clone https://github.com/ggerganov/llama.cpp.git
pushd llama.cpp
make
popd
- name: Download GGUF files
run: |
mkdir gguf_files
wget -O gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true"
./llama.cpp/llama-quantize --allow-requantize gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf gguf_files/TinyLlama-1.1B-openorca.Q4_0.requant_F32.gguf F32
- name: Load files
run: |
touch test.py
echo "from torchchat.utils.gguf_loader import test_by_to_float" >> test.py
echo "test_by_to_float(\"gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf\", \"gguf_files/TinyLlama-1.1B-openorca.Q4_0.requant_F32.gguf\")" >> test.py
cat test.py
python test.py
echo "Tests complete."
test-mps-dtype:
strategy:
matrix:
runner: [macos-m1-stable ]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.10.11
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Run test
run: |
export PYTHON_VERSION="3.10"
set -x
# NS/MC: Remove previous installation of torch and torchao first
# as this script does not install anything into conda env but rather as system dep
pip3 uninstall -y torch || true
set -eou pipefail
pip3 uninstall -y torchao || true
set -eou pipefail
echo "::group::Print machine info"
uname -a
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
echo "::endgroup::"
echo "::group::Install requirements"
# Install requirements
./install/install_requirements.sh
ls -la
pwd
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
echo "::endgroup::"
echo "::group::Download checkpoints"
(
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
)
echo "::endgroup::"
echo "::group::Run inference"
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
for DTYPE in float16 float32; do
# if [ $(uname -s) == Darwin ]; then
# export DTYPE=float16
# fi
python3 torchchat.py generate --dtype ${DTYPE} --device mps --checkpoint-path ${MODEL_PATH} --temperature 0
python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
done
compile-gguf:
strategy:
matrix:
runner: [macos-14]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.10.11
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install requirements
run: |
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Download GGUF
run: |
mkdir gguf_files
export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
export TOKENIZER_PATH=gguf_files/tokenizer.model
wget -O ${GGUF_PATH} "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true"
wget -O ${TOKENIZER_PATH} https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
- name: Run inference
run: |
export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
export TOKENIZER_PATH=gguf_files/tokenizer.model
export MODEL_NAME=TinyLlama-1.1B-openorca.Q4_0.gguf
export MODEL_DIR=/tmp
echo "******************************************"
echo "******* Embed: not quantized *************"
echo "******************************************"
echo "Running eager"
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
echo "Running compiled"
python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
echo "******************************************"
echo "******* Emb: channel-wise quantized ******"
echo "******************************************"
echo "Running eager"
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
echo "Running compiled"
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
echo "******************************************"
echo "******** Emb: group-wise quantized *******"
echo "******************************************"
echo "Running eager"
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu
echo "Running compiled"
python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu
echo "tests complete"
echo "******************************************"
runner-et:
strategy:
matrix:
runner: [16-core-ubuntu, macos-14-xlarge]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.10.11
- name: Setup Xcode
if: runner.os == 'macOS'
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '15.3'
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install torchchat
run: |
echo "Intalling pip3 packages"
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Set ET git sha
id: setup-hash
run: |
export TORCHCHAT_ROOT=${PWD}
echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
- name: Load or install ET
id: install-et
uses: actions/cache@v4
with:
path: |
./et-build
./torchchat/utils/scripts
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
continue-on-error: true
run: |
echo "Installing ExecuTorch"
bash torchchat/utils/scripts/install_et.sh
- name: Install ExecuTorch python
run: |
echo "Install ExecuTorch python"
export TORCHCHAT_ROOT=$PWD
export ET_BUILD_DIR="et-build"
ENABLE_ET_PYBIND="${1:-true}"
source "torchchat/utils/scripts/install_utils.sh"
install_executorch_python_libs $ENABLE_ET_PYBIND
- name: Install runner
run: |
echo "Installing runner"
bash torchchat/utils/scripts/build_native.sh et
- name: Run inference
run: |
python torchchat.py download stories15M
wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
export PRMT="Once upon a time in a land far away"
python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}" --device cpu
python torchchat.py export stories15M --output-pte-path ./model.pte
./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
for dtype in fp32 fp16 bf16; do
echo "Testing export + runner with dtype=$dtype"
python torchchat.py export stories15M --dtype $dtype --output-pte-path ./model.pte
./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
done
echo "Tests complete."
runner-aoti:
strategy:
matrix:
runner: [16-core-ubuntu, macos-14-xlarge]
runs-on: ${{matrix.runner}}
env:
TORCHCHAT_ROOT: ${{ github.workspace }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10.11'
- name: Print machine info
run: |
echo "$(uname -a)"
- name: Install dependencies
run: |
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
bash torchchat/utils/scripts/build_native.sh aoti
- name: Download checkpoint
run: |
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
- name: Run inference
run: |
set -eou pipefail
export MODEL_DIR=${PWD}/checkpoints/stories15M
export PROMPT="Once upon a time in a land far away"
python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" --device cpu
for dtype in fp32 fp16 bf16 fast fast16; do
echo "Running export + runner with dtype=$dtype"
python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --dtype $dtype --output-dso-path /tmp/model.so
./cmake-out/aoti_run /tmp/model.so -z ${MODEL_DIR}/tokenizer.model -i "${PROMPT}"
done
echo "Tests complete."
test-build-runner-et-android:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
runner: linux.4xlarge
script: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
export TORCHCHAT_ROOT=${PWD}
pushd /tmp
wget https://dl.google.com/android/repository/android-ndk-r26c-linux.zip
unzip android-ndk-r26c-linux.zip
popd
export ANDROID_NDK=/tmp/android-ndk-r26c
# Pull submodules (re2, abseil) for Tiktoken
git submodule sync
git submodule update --init
./runner/build_android.sh
echo "Tests complete."
test-torchao-experimental:
strategy:
matrix:
runner: [macos-14-xlarge]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.10.11
- name: Setup Xcode
if: runner.os == 'macOS'
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '15.3'
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install torchchat
run: |
echo "Intalling pip3 packages"
./install/install_requirements.sh
pip3 list
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
- name: Install torchao-ops
id: install-torchao-ops
run: |
bash torchchat/utils/scripts/build_torchao_ops.sh
- name: Set git shas
id: setup-hash
run: |
export TORCHCHAT_ROOT=${PWD}
echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
- name: Load or install ET
id: install-et
uses: actions/cache@v4
with:
path: |
./et-build
./torchchat/utils/scripts/install_et.sh
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
continue-on-error: true
run: |
echo "Installing ExecuTorch"
bash torchchat/utils/scripts/install_et.sh
- name: Install ExecuTorch python
run: |
echo "Install ExecuTorch python"
export TORCHCHAT_ROOT=$PWD
export ET_BUILD_DIR="et-build"
ENABLE_ET_PYBIND="${1:-true}"
source "torchchat/utils/scripts/install_utils.sh"
install_executorch_python_libs $ENABLE_ET_PYBIND
- name: Install runner
run: |
echo "Installing runner"
bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
- name: Install runner AOTI
id: install-runner-aoti
run: |
bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
- name: Run inference
run: |
python torchchat.py download stories110M
wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
export PRMT="Once upon a time in a land far away"
echo "Generate eager"
python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
echo "Generate compile"
python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
echo "Export and run ET (C++ runner)"
python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
echo "Export and run AOTI (C++ runner)"
python torchchat.py export stories110M --output-dso-path ./model.so --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}"
echo "Generate AOTI"
python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}"
echo "Tests complete."