Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl. #3087

Workflow file for this run

	name: pull

	on:
	pull_request:
	push:
	branches:
	- main
	workflow_dispatch:

	jobs:
	gather-models-cpu:
	runs-on: ubuntu-22.04
	outputs:
	models: ${{ steps.gather-models-cpu.outputs.models }}
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: 'false'
	- uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Extract the list of models to run on CPU
	id: gather-models-cpu
	run: \|
	set -eux
	PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "cpu"
	test-cpu-compile:
	name: test-cpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-cpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	env:
	TORCHCHAT_ROOT: ${{ github.workspace }}
	REPO_NAME: ${{ matrix.repo_name }}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	- name: Setup Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Print machine info
	run: \|
	echo "$(uname -a)"
	- name: Install dependencies
	run: \|
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Download checkpoints
	run: \|
	bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
	- name: Run validation
	run: \|
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	pushd ${TORCHCHAT_ROOT}
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "compile"

	test-cpu-aoti:
	name: test-cpu-aoti (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-cpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	env:
	TORCHCHAT_ROOT: ${{ github.workspace }}
	REPO_NAME: ${{ matrix.repo_name }}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	- name: Setup Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Print machine info
	run: \|
	echo "$(uname -a)"
	- name: Install dependencies
	run: \|
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Download checkpoints
	run: \|
	bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
	- name: Run validation
	run: \|
	pushd ${TORCHCHAT_ROOT}
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "aoti"

	test-cpu-eval-sanity-check:
	name: test-cpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-cpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	env:
	TORCHCHAT_ROOT: ${{ github.workspace }}
	REPO_NAME: ${{ matrix.repo_name }}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	- name: Setup Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Print machine info
	run: \|
	echo "$(uname -a)"
	- name: Install dependencies
	run: \|
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Download checkpoints
	run: \|
	bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
	- name: Run validation
	run: \|
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	pushd ${TORCHCHAT_ROOT}
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-bfloat16"

	test-cpu-eval-sanity-check-float16:
	name: test-cpu-eval-sanity-check-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-cpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	env:
	TORCHCHAT_ROOT: ${{ github.workspace }}
	REPO_NAME: ${{ matrix.repo_name }}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	- name: Setup Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Print machine info
	run: \|
	echo "$(uname -a)"
	- name: Install dependencies
	run: \|
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Download checkpoints
	run: \|
	bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
	- name: Run validation
	run: \|
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	pushd ${TORCHCHAT_ROOT}
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float16"

	test-cpu-eval-sanity-check-float32:
	name: test-cpu-eval-sanity-check-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-cpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	env:
	TORCHCHAT_ROOT: ${{ github.workspace }}
	REPO_NAME: ${{ matrix.repo_name }}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	- name: Setup Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Print machine info
	run: \|
	echo "$(uname -a)"
	- name: Install dependencies
	run: \|
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Download checkpoints
	run: \|
	bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
	- name: Run validation
	run: \|
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	pushd ${TORCHCHAT_ROOT}
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"

	gather-models-gpu:
	runs-on: ubuntu-22.04
	outputs:
	models: ${{ steps.gather-models-gpu.outputs.models }}
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: 'false'
	- uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Extract the list of models to run on GPU
	id: gather-models-gpu
	run: \|
	set -eux
	PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu"
	test-gpu-compile:
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-gpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
	fail-fast: false
	with:
	runner: linux.g5.4xlarge.nvidia.gpu
	gpu-arch-type: cuda
	gpu-arch-version: "12.1"
	script: \|
	echo "::group::Print machine info"
	nvidia-smi
	echo "::endgroup::"

	echo "::group::Install required packages"
	./install/install_requirements.sh cuda
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	echo "::endgroup::"

	echo "::group::Download checkpoint"
	export REPO_NAME=${{ matrix.repo_name }}
	bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
	echo "::endgroup::"

	echo "::group::Convert checkpoint"
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	echo "::endgroup::"

	echo "::group::Run inference"
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "compile"
	echo "::endgroup::"

	test-gpu-aoti-bfloat16:
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-gpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
	fail-fast: false
	with:
	runner: linux.g5.4xlarge.nvidia.gpu
	gpu-arch-type: cuda
	gpu-arch-version: "12.1"
	timeout: 60
	script: \|
	echo "::group::Print machine info"
	nvidia-smi
	echo "::endgroup::"

	echo "::group::Install newer objcopy that supports --set-section-alignment"
	yum install -y devtoolset-10-binutils
	export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
	echo "::endgroup::"

	echo "::group::Install required packages"
	./install/install_requirements.sh cuda
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	echo "::endgroup::"

	echo "::group::Download checkpoint"
	export REPO_NAME=${{ matrix.repo_name }}
	bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
	echo "::endgroup::"

	echo "::group::Convert checkpoint"
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	echo "::endgroup::"

	echo "::group::Run inference"
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
	echo "::endgroup::"

	test-gpu-aoti-float32:
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-gpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
	fail-fast: false
	with:
	runner: linux.g5.4xlarge.nvidia.gpu
	gpu-arch-type: cuda
	gpu-arch-version: "12.1"
	script: \|
	echo "::group::Print machine info"
	nvidia-smi
	echo "::endgroup::"

	echo "::group::Install newer objcopy that supports --set-section-alignment"
	yum install -y devtoolset-10-binutils
	export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
	echo "::endgroup::"

	echo "::group::Install required packages"
	./install/install_requirements.sh cuda
	pip list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	echo "::endgroup::"

	echo "::group::Download checkpoint"
	export REPO_NAME=${{ matrix.repo_name }}
	bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
	echo "::endgroup::"

	echo "::group::Convert checkpoint"
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	echo "::endgroup::"

	echo "::group::Run inference"
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float32"
	echo "::endgroup::"

	echo "::group::Run inference with quantize file"
	if [ $(uname -s) != Darwin ]; then
	python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
	fi
	echo "::endgroup::"

	test-gpu-aoti-float16:
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-gpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
	fail-fast: false
	with:
	runner: linux.g5.4xlarge.nvidia.gpu
	gpu-arch-type: cuda
	gpu-arch-version: "12.1"
	script: \|
	echo "::group::Print machine info"
	nvidia-smi
	echo "::endgroup::"

	echo "::group::Install newer objcopy that supports --set-section-alignment"
	yum install -y devtoolset-10-binutils
	export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
	echo "::endgroup::"

	echo "::group::Install required packages"
	./install/install_requirements.sh cuda
	pip list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	echo "::endgroup::"

	echo "::group::Download checkpoint"
	export REPO_NAME=${{ matrix.repo_name }}
	bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
	echo "::endgroup::"

	echo "::group::Convert checkpoint"
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	echo "::endgroup::"

	echo "::group::Run inference"
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-float16"
	echo "::endgroup::"

	echo "::group::Run inference with quantize file"
	if [ $(uname -s) == Darwin ]; then
	python3 torchchat.py export --output-dso-path /tmp/model.so --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
	python3 torchchat.py generate --dso-path /tmp/model.so --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
	fi
	echo "::endgroup::"

	test-gpu-eval-sanity-check:
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }})
	needs: gather-models-gpu
	strategy:
	matrix: ${{ fromJSON(needs.gather-models-gpu.outputs.models) }}
	fail-fast: false
	with:
	runner: linux.g5.4xlarge.nvidia.gpu
	gpu-arch-type: cuda
	gpu-arch-version: "12.1"
	script: \|
	echo "::group::Print machine info"
	nvidia-smi
	echo "::endgroup::"

	echo "::group::Install newer objcopy that supports --set-section-alignment"
	yum install -y devtoolset-10-binutils
	export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
	echo "::endgroup::"

	echo "::group::Install required packages"
	./install/install_requirements.sh cuda
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	echo "::endgroup::"

	echo "::group::Download checkpoint"
	export REPO_NAME=${{ matrix.repo_name }}
	bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
	echo "::endgroup::"

	echo "::group::Convert checkpoint"
	bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
	echo "::endgroup::"

	echo "::group::Run eval"
	bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "eval_sanity_check"
	echo "::endgroup::"

	test-tinystories-executorch:
	strategy:
	matrix:
	runner: [16-core-ubuntu, macos-14-xlarge]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v2
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: '3.10.11'
	- name: Setup Xcode
	if: runner.os == 'macOS'
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.3'
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Install requirements
	run: \|
	echo "Intalling pip3 packages"
	./install/install_requirements.sh

	export TORCHCHAT_ROOT=$PWD
	./torchchat/utils/scripts/install_et.sh

	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'

	cd ../..
	echo "Inside: ${PWD}"
	- name: Download checkpoints
	run: \|
	mkdir -p checkpoints/stories15M
	pushd checkpoints/stories15M
	wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
	wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	popd

	mkdir gguf_files
	export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
	export GGUF_TOKENIZER_PATH=gguf_files/tokenizer.model
	wget -O ${GGUF_PATH} "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true"
	wget -O ${GGUF_TOKENIZER_PATH} https://github.com/karpathy/llama2.c/raw/master/tokenizer.model

	- name: Run inference
	run: \|
	export MODEL_PATH=${PWD}/checkpoints/stories15M/stories15M.pt
	export MODEL_NAME=stories15M

	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --device cpu

	python torchchat.py export --checkpoint-path ${MODEL_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte

	echo "Tests complete."

	- name: Run inference
	run: \|
	export MODEL_PATH=checkpoints/stories15M/stories15M.pt
	export MODEL_NAME=stories15M
	export MODEL_DIR=/tmp

	echo "******************************************"
	echo "* vanilla *"
	echo "******************************************"
	python torchchat.py export --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "* --quantize torchchat/quant_config/mobile.json *"
	echo "******************************************"
	# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte


	echo "******************************************"
	echo "***** Emb: channel-wise quantized ****"
	echo "******************************************"
	python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "****** Emb: group-wise quantized *****"
	echo "******************************************"
	python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "** Emb 4bit: channel-wise quantized **"
	echo "******************************************"
	python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "**** Emb 4bit: group-wise quantized **"
	echo "******************************************"
	python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "***** INT8 channel-wise quantized ****"
	echo "******************************************"
	python torchchat.py export --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "****** INT8 group-wise quantized *****"
	echo "******************************************"
	python torchchat.py export --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "****** ET: a8w4dq INT4 group-wise quantized *****"
	echo "******************************************"
	python torchchat.py export --quant '{"linear:a8w4dq" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "******************************************"
	echo "****** INT4 group-wise quantized *****"
	echo "******************************************"
	# python torchchat.py export --quant '{"linear:int4" : {"groupsize": 32}}' --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
	# python3 torchchat.py generate --tokenizer-path ${TOKENIZER_PATH} --gguf-path ${GGUF_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte

	echo "tests complete"
	echo "******************************************"

	- name: Run GGUF export + inference
	run: \|
	export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
	export GGUF_TOKENIZER_PATH=gguf_files/tokenizer.model

	python torchchat.py export --gguf-path ${GGUF_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
	python torchchat.py generate --gguf-path ${GGUF_PATH} --pte-path ${PWD}/${MODEL_NAME}.pte --tokenizer-path ${GGUF_TOKENIZER_PATH} --temperature 0 --max-new-tokens 20

	echo "Tests complete."
	torchchat-command-load-test:
	strategy:
	matrix:
	runner: [macos-14]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v2
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: '3.10.11'
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Install requirements
	run: \|
	echo "Installing pip3 packages"
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'

	- name: Download Stories files
	run: \|

	mkdir -p checkpoints/stories15M
	pushd checkpoints/stories15M
	curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
	curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	popd

	- name: Test generate
	run: \|

	export MODEL_PATH=checkpoints/stories15M/stories15M.pt
	export MODEL_NAME=stories15M
	export MODEL_DIR=/tmp

	python3 torchchat.py generate --device cpu --checkpoint-path ${MODEL_PATH} --temperature 0
	python torchchat.py generate --device cpu --checkpoint-path ${MODEL_PATH} --temperature 0
	echo "Tests complete."

	- name: Test download
	run: \|

	python torchchat.py list
	python torchchat.py download stories15m
	python torchchat.py generate stories15M --device cpu
	python torchchat.py remove stories15m

	test-mps:
	strategy:
	matrix:
	runner: [macos-m1-stable ]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v2
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: 3.10.11
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Run test
	run: \|
	export PYTHON_VERSION="3.10"
	set -x
	# NS/MC: Remove previous installation of torch and torchao first
	# as this script does not install anything into conda env but rather as system dep
	pip3 uninstall -y torch \|\| true
	set -eou pipefail

	pip3 uninstall -y torchao \|\| true
	set -eou pipefail

	echo "::group::Print machine info"
	uname -a
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	echo "::endgroup::"

	echo "::group::Install requirements"
	# Install requirements
	./install/install_requirements.sh
	ls -la
	pwd
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	echo "::endgroup::"

	echo "::group::Download checkpoints"
	(
	mkdir -p checkpoints/stories15M
	pushd checkpoints/stories15M
	curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
	curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	popd
	)
	echo "::endgroup::"

	echo "::group::Run inference"
	export MODEL_PATH=checkpoints/stories15M/stories15M.pt
	export MODEL_NAME=stories15M
	export MODEL_DIR=/tmp

	python3 torchchat.py generate --device mps --checkpoint-path ${MODEL_PATH} --temperature 0

	echo "************************************************************"
	echo "*** embedding"
	echo "************************************************************"

	python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
	python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0

	echo "************************************************************"
	echo "*** linear int8"
	echo "************************************************************"

	python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
	python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0

	echo "************************************************************"
	echo "*** linear int4"
	echo "************************************************************"

	PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
	test-gguf-util:
	strategy:
	matrix:
	runner: [macos-14]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v2
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: 3.10.11
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Install requirements
	run: \|
	echo "Intalling pip3 packages"
	pip3 install gguf
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'

	git clone https://github.com/ggerganov/llama.cpp.git
	pushd llama.cpp
	make
	popd

	- name: Download GGUF files
	run: \|
	mkdir gguf_files
	wget -O gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true"
	./llama.cpp/llama-quantize --allow-requantize gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf gguf_files/TinyLlama-1.1B-openorca.Q4_0.requant_F32.gguf F32

	- name: Load files
	run: \|
	touch test.py
	echo "from torchchat.utils.gguf_loader import test_by_to_float" >> test.py
	echo "test_by_to_float(\"gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf\", \"gguf_files/TinyLlama-1.1B-openorca.Q4_0.requant_F32.gguf\")" >> test.py
	cat test.py
	python test.py

	echo "Tests complete."
	test-mps-dtype:
	strategy:
	matrix:
	runner: [macos-m1-stable ]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v2
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: 3.10.11
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Run test
	run: \|
	export PYTHON_VERSION="3.10"
	set -x
	# NS/MC: Remove previous installation of torch and torchao first
	# as this script does not install anything into conda env but rather as system dep
	pip3 uninstall -y torch \|\| true
	set -eou pipefail

	pip3 uninstall -y torchao \|\| true
	set -eou pipefail

	echo "::group::Print machine info"
	uname -a
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	echo "::endgroup::"

	echo "::group::Install requirements"
	# Install requirements
	./install/install_requirements.sh
	ls -la
	pwd
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	echo "::endgroup::"

	echo "::group::Download checkpoints"
	(
	mkdir -p checkpoints/stories15M
	pushd checkpoints/stories15M
	curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
	curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	popd
	)
	echo "::endgroup::"

	echo "::group::Run inference"
	export MODEL_PATH=checkpoints/stories15M/stories15M.pt
	export MODEL_NAME=stories15M
	export MODEL_DIR=/tmp
	for DTYPE in float16 float32; do
	# if [ $(uname -s) == Darwin ]; then
	# export DTYPE=float16
	# fi

	python3 torchchat.py generate --dtype ${DTYPE} --device mps --checkpoint-path ${MODEL_PATH} --temperature 0

	python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0

	python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0

	python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0

	python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0

	PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
	done
	compile-gguf:
	strategy:
	matrix:
	runner: [macos-14]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v2
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: 3.10.11
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Install requirements
	run: \|
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Download GGUF
	run: \|
	mkdir gguf_files
	export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
	export TOKENIZER_PATH=gguf_files/tokenizer.model

	wget -O ${GGUF_PATH} "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true"
	wget -O ${TOKENIZER_PATH} https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	- name: Run inference
	run: \|
	export GGUF_PATH=gguf_files/TinyLlama-1.1B-openorca.Q4_0.gguf
	export TOKENIZER_PATH=gguf_files/tokenizer.model
	export MODEL_NAME=TinyLlama-1.1B-openorca.Q4_0.gguf
	export MODEL_DIR=/tmp

	echo "******************************************"
	echo "***** Embed: not quantized ***********"
	echo "******************************************"

	echo "Running eager"
	python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu

	echo "Running compiled"
	python3 torchchat.py generate --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu

	echo "******************************************"
	echo "***** Emb: channel-wise quantized ****"
	echo "******************************************"

	echo "Running eager"
	python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu

	echo "Running compiled"
	python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu

	echo "******************************************"
	echo "****** Emb: group-wise quantized *****"
	echo "******************************************"

	echo "Running eager"
	python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --device cpu

	echo "Running compiled"
	python3 torchchat.py generate --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --gguf-path ${GGUF_PATH} --tokenizer-path ${TOKENIZER_PATH} --max-new-tokens 20 --temperature 0 --compile --device cpu

	echo "tests complete"
	echo "******************************************"
	runner-et:
	strategy:
	matrix:
	runner: [16-core-ubuntu, macos-14-xlarge]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	with:
	submodules: true
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: 3.10.11
	- name: Setup Xcode
	if: runner.os == 'macOS'
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.3'
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Install torchchat
	run: \|
	echo "Intalling pip3 packages"
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Set ET git sha
	id: setup-hash
	run: \|
	export TORCHCHAT_ROOT=${PWD}
	echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
	- name: Load or install ET
	id: install-et
	uses: actions/cache@v4
	with:
	path: \|
	./et-build
	./torchchat/utils/scripts
	key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
	- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
	continue-on-error: true
	run: \|
	echo "Installing ExecuTorch"
	bash torchchat/utils/scripts/install_et.sh
	- name: Install ExecuTorch python
	run: \|
	echo "Install ExecuTorch python"
	export TORCHCHAT_ROOT=$PWD
	export ET_BUILD_DIR="et-build"
	ENABLE_ET_PYBIND="${1:-true}"
	source "torchchat/utils/scripts/install_utils.sh"
	install_executorch_python_libs $ENABLE_ET_PYBIND
	- name: Install runner
	run: \|
	echo "Installing runner"
	bash torchchat/utils/scripts/build_native.sh et
	- name: Run inference
	run: \|
	python torchchat.py download stories15M
	wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model

	export PRMT="Once upon a time in a land far away"

	python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}" --device cpu

	python torchchat.py export stories15M --output-pte-path ./model.pte
	./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"

	for dtype in fp32 fp16 bf16; do
	echo "Testing export + runner with dtype=$dtype"
	python torchchat.py export stories15M --dtype $dtype --output-pte-path ./model.pte
	./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
	done

	echo "Tests complete."
	runner-aoti:
	strategy:
	matrix:
	runner: [16-core-ubuntu, macos-14-xlarge]
	runs-on: ${{matrix.runner}}
	env:
	TORCHCHAT_ROOT: ${{ github.workspace }}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	with:
	submodules: true
	- name: Setup Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.10.11'
	- name: Print machine info
	run: \|
	echo "$(uname -a)"
	- name: Install dependencies
	run: \|
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'

	bash torchchat/utils/scripts/build_native.sh aoti

	- name: Download checkpoint
	run: \|
	mkdir -p checkpoints/stories15M
	pushd checkpoints/stories15M
	wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
	wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	popd
	- name: Run inference
	run: \|
	set -eou pipefail

	export MODEL_DIR=${PWD}/checkpoints/stories15M
	export PROMPT="Once upon a time in a land far away"

	python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}" --device cpu

	for dtype in fp32 fp16 bf16 fast fast16; do
	echo "Running export + runner with dtype=$dtype"
	python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --dtype $dtype --output-dso-path /tmp/model.so
	./cmake-out/aoti_run /tmp/model.so -z ${MODEL_DIR}/tokenizer.model -i "${PROMPT}"
	done

	echo "Tests complete."

	test-build-runner-et-android:
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	with:
	runner: linux.4xlarge
	script: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'

	export TORCHCHAT_ROOT=${PWD}
	pushd /tmp
	wget https://dl.google.com/android/repository/android-ndk-r26c-linux.zip
	unzip android-ndk-r26c-linux.zip
	popd
	export ANDROID_NDK=/tmp/android-ndk-r26c

	# Pull submodules (re2, abseil) for Tiktoken
	git submodule sync
	git submodule update --init
	./runner/build_android.sh
	echo "Tests complete."

	test-torchao-experimental:
	strategy:
	matrix:
	runner: [macos-14-xlarge]
	runs-on: ${{matrix.runner}}
	steps:
	- name: Checkout repo
	uses: actions/checkout@v3
	with:
	submodules: true
	- name: Setup Python
	uses: actions/setup-python@v2
	with:
	python-version: 3.10.11
	- name: Setup Xcode
	if: runner.os == 'macOS'
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.3'
	- name: Print machine info
	run: \|
	uname -a
	if [ $(uname -s) == Darwin ]; then
	sysctl machdep.cpu.brand_string
	sysctl machdep.cpu.core_count
	fi
	- name: Install torchchat
	run: \|
	echo "Intalling pip3 packages"
	./install/install_requirements.sh
	pip3 list
	python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
	- name: Install torchao-ops
	id: install-torchao-ops
	run: \|
	bash torchchat/utils/scripts/build_torchao_ops.sh
	- name: Set git shas
	id: setup-hash
	run: \|
	export TORCHCHAT_ROOT=${PWD}
	echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
	- name: Load or install ET
	id: install-et
	uses: actions/cache@v4
	with:
	path: \|
	./et-build
	./torchchat/utils/scripts/install_et.sh
	key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
	- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
	continue-on-error: true
	run: \|
	echo "Installing ExecuTorch"
	bash torchchat/utils/scripts/install_et.sh
	- name: Install ExecuTorch python
	run: \|
	echo "Install ExecuTorch python"
	export TORCHCHAT_ROOT=$PWD
	export ET_BUILD_DIR="et-build"
	ENABLE_ET_PYBIND="${1:-true}"
	source "torchchat/utils/scripts/install_utils.sh"
	install_executorch_python_libs $ENABLE_ET_PYBIND
	- name: Install runner
	run: \|
	echo "Installing runner"
	bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
	- name: Install runner AOTI
	id: install-runner-aoti
	run: \|
	bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
	- name: Run inference
	run: \|
	python torchchat.py download stories110M
	wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
	export PRMT="Once upon a time in a land far away"
	echo "Generate eager"
	python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
	echo "Generate compile"
	python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
	echo "Export and run ET (C++ runner)"
	python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
	./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
	echo "Export and run AOTI (C++ runner)"
	python torchchat.py export stories110M --output-dso-path ./model.so --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
	./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}"
	echo "Generate AOTI"
	python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}"
	echo "Tests complete."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl. #3087

Workflow file

Add check for rocminfo in the installation script to replace the nightly URL with the appropriate ROCm whl. #3087

Uh oh!

Jobs

Run details

Workflow file for this run