diff --git a/.github/workflows/more-tests.yml b/.github/workflows/more-tests.yml index 1e0652c96..f47740fe3 100644 --- a/.github/workflows/more-tests.yml +++ b/.github/workflows/more-tests.yml @@ -9,23 +9,17 @@ on: jobs: test-cuda: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - - echo "::group::Download checkpoints" # Install requirements ./install/install_requirements.sh cuda diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index a9561e3e8..5a0d9920b 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -108,7 +108,7 @@ jobs: set -eux PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "periodic" --backend "gpu" test-gpu: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu secrets: inherit @@ -119,7 +119,7 @@ jobs: secrets-env: "HF_TOKEN_PERIODIC" runner: ${{ matrix.runner }} gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" script: | echo "::group::Print machine info" nvidia-smi diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index d25c674dd..623b0e80f 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -215,7 +215,7 @@ jobs: set -eux PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --event "pull_request" --backend "gpu" test-gpu-compile: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-compile (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu strategy: @@ -224,7 +224,7 @@ jobs: with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" script: | echo "::group::Print machine info" nvidia-smi @@ -250,7 +250,7 @@ jobs: echo "::endgroup::" test-gpu-aoti-bfloat16: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-aoti-bfloat16 (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu strategy: @@ -259,18 +259,13 @@ jobs: with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" nvidia-smi echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - echo "::group::Install required packages" ./install/install_requirements.sh cuda pip3 list @@ -291,7 +286,7 @@ jobs: echo "::endgroup::" test-gpu-aoti-float32: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-aoti-float32 (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu strategy: @@ -300,17 +295,12 @@ jobs: with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" script: | echo "::group::Print machine info" nvidia-smi echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - echo "::group::Install required packages" ./install/install_requirements.sh cuda pip list @@ -337,7 +327,7 @@ jobs: echo "::endgroup::" test-gpu-aoti-float16: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-aoti-float16 (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu strategy: @@ -346,17 +336,12 @@ jobs: with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" script: | echo "::group::Print machine info" nvidia-smi echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - echo "::group::Install required packages" ./install/install_requirements.sh cuda pip list @@ -384,7 +369,7 @@ jobs: echo "::endgroup::" test-gpu-eval-sanity-check: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main name: test-gpu-eval-sanity-check (${{ matrix.platform }}, ${{ matrix.model_name }}) needs: gather-models-gpu strategy: @@ -393,17 +378,12 @@ jobs: with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" script: | echo "::group::Print machine info" nvidia-smi echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - echo "::group::Install required packages" ./install/install_requirements.sh cuda pip3 list @@ -1031,7 +1011,7 @@ jobs: echo "Tests complete." test-build-runner-et-android: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.4xlarge script: | diff --git a/.github/workflows/run-readme-periodic.yml b/.github/workflows/run-readme-periodic.yml index 6a933b5f1..61501e0c4 100644 --- a/.github/workflows/run-readme-periodic.yml +++ b/.github/workflows/run-readme-periodic.yml @@ -10,24 +10,19 @@ on: jobs: test-readme: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main secrets: inherit with: runner: linux.g5.4xlarge.nvidia.gpu secrets-env: "HF_TOKEN_PERIODIC" gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - echo "::group::Create script to run README" python3 torchchat/utils/scripts/updown.py --create-sections --file README.md > ./run-readme.sh # for good measure, if something happened to updown processor, @@ -44,23 +39,18 @@ jobs: test-quantization-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu secrets: inherit gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - echo "::group::Create script to run quantization" python3 torchchat/utils/scripts/updown.py --create-sections --file docs/quantization.md > ./run-quantization.sh # for good measure, if something happened to updown processor, @@ -76,24 +66,19 @@ jobs: echo "::endgroup::" test-gguf-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main secrets: inherit with: runner: linux.g5.4xlarge.nvidia.gpu secrets-env: "HF_TOKEN_PERIODIC" gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - echo "::group::Create script to run gguf" python3 torchchat/utils/scripts/updown.py --file docs/GGUF.md > ./run-gguf.sh # for good measure, if something happened to updown processor, diff --git a/.github/workflows/run-readme-pr.yml b/.github/workflows/run-readme-pr.yml index 1dc2942ef..8694757e7 100644 --- a/.github/workflows/run-readme-pr.yml +++ b/.github/workflows/run-readme-pr.yml @@ -9,22 +9,17 @@ on: jobs: test-readme-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - .ci/scripts/run-docs readme echo "::group::Completion" @@ -33,22 +28,17 @@ jobs: echo "::endgroup::" test-readme-cpu: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs readme echo "::group::Completion" @@ -57,22 +47,17 @@ jobs: echo "::endgroup::" test-quantization-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - .ci/scripts/run-docs quantization echo "::group::Completion" @@ -81,41 +66,31 @@ jobs: echo "::endgroup::" test-quantization-cpu: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization test-gguf-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - .ci/scripts/run-docs gguf echo "::group::Completion" @@ -124,22 +99,17 @@ jobs: echo "::endgroup::" test-gguf-cpu: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf echo "::group::Completion" @@ -149,22 +119,17 @@ jobs: test-advanced-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - .ci/scripts/run-docs advanced echo "::group::Completion" @@ -174,22 +139,17 @@ jobs: test-advanced-cpu: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced echo "::group::Completion" @@ -198,22 +158,17 @@ jobs: echo "::endgroup::" test-evaluation-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - .ci/scripts/run-docs evaluation echo "::group::Completion" @@ -222,22 +177,17 @@ jobs: echo "::endgroup::" test-evaluation-cpu: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs evaluation echo "::group::Completion" @@ -246,22 +196,17 @@ jobs: echo "::endgroup::" test-multimodal-any: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - .ci/scripts/run-docs multimodal echo "::group::Completion" @@ -270,22 +215,17 @@ jobs: echo "::endgroup::" test-multimodal-cpu: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs multimodal test-native-any: diff --git a/.github/workflows/runner-cuda-dtype.yml b/.github/workflows/runner-cuda-dtype.yml index b83b9904b..1813f483e 100644 --- a/.github/workflows/runner-cuda-dtype.yml +++ b/.github/workflows/runner-cuda-dtype.yml @@ -9,24 +9,18 @@ on: jobs: test-runner-aot-cuda: - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.g5.4xlarge.nvidia.gpu secrets-env: "HF_TOKEN_PERIODIC" gpu-arch-type: cuda - gpu-arch-version: "12.1" + gpu-arch-version: "12.4" timeout: 60 script: | echo "::group::Print machine info" uname -a echo "::endgroup::" - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - echo "::endgroup::" - - echo "::group::Download checkpoints" # Install requirements diff --git a/install/.pins/torchao-pin.txt b/install/.pins/torchao-pin.txt index 80a4751bc..c6161e78f 100644 --- a/install/.pins/torchao-pin.txt +++ b/install/.pins/torchao-pin.txt @@ -1 +1 @@ -7d7c14e898eca3fe66138d2a9445755a9270b800 +7d7c14e898eca3fe66138d2a9445755a9270b800 \ No newline at end of file diff --git a/install/install_requirements.sh b/install/install_requirements.sh index eab92a4f1..3db559dbc 100755 --- a/install/install_requirements.sh +++ b/install/install_requirements.sh @@ -62,13 +62,13 @@ echo "Using pip executable: $PIP_EXECUTABLE" # NOTE: If a newly-fetched version of the executorch repo changes the value of # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary # package versions. -PYTORCH_NIGHTLY_VERSION=dev20241028 +PYTORCH_NIGHTLY_VERSION=dev20241213 # Nightly version for torchvision -VISION_NIGHTLY_VERSION=dev20241028 +VISION_NIGHTLY_VERSION=dev20241213 # Nightly version for torchtune -TUNE_NIGHTLY_VERSION=dev20241013 +TUNE_NIGHTLY_VERSION=dev20241126 # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same ( @@ -81,7 +81,7 @@ TUNE_NIGHTLY_VERSION=dev20241013 # with cuda for faster execution on cuda GPUs. if [[ -x "$(command -v nvidia-smi)" ]]; then - TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu121" + TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu124" elif [[ -x "$(command -v rocminfo)" ]]; then TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/rocm6.2" @@ -92,8 +92,8 @@ fi # pip packages needed by exir. REQUIREMENTS_TO_INSTALL=( torch=="2.6.0.${PYTORCH_NIGHTLY_VERSION}" - torchvision=="0.20.0.${VISION_NIGHTLY_VERSION}" - torchtune=="0.4.0.${TUNE_NIGHTLY_VERSION}" + torchvision=="0.22.0.${VISION_NIGHTLY_VERSION}" + torchtune=="0.5.0.${TUNE_NIGHTLY_VERSION}" ) # Install the requirements. --extra-index-url tells pip to look for package @@ -104,9 +104,11 @@ REQUIREMENTS_TO_INSTALL=( "${REQUIREMENTS_TO_INSTALL[@]}" ) +# For torchao need to install from github since nightly build doesn't have macos build. +# TODO: Remove this and install nightly build, once it supports macos ( set -x - $PIP_EXECUTABLE install torchao=="0.5.0" + $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@2f97b0955953fa1a46594a27f0df2bc48d93e79d ) if [[ -x "$(command -v nvidia-smi)" ]]; then diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py index a39a2ed95..2773c8372 100644 --- a/torchchat/cli/builder.py +++ b/torchchat/cli/builder.py @@ -373,6 +373,8 @@ def _load_model_gguf(builder_args: BuilderArgs) -> Model: kwargs = {} else: kwargs = builder_args.gguf_kwargs + + kwargs.setdefault("device", builder_args.device) model = Model.from_gguf(builder_args.gguf_path, **kwargs) return model @@ -396,6 +398,7 @@ def _load_checkpoint(builder_args: BuilderArgs): os.path.join(builder_args.checkpoint_dir, cp_name), map_location=builder_args.device, mmap=True, + weights_only=False, ) ) checkpoint = {} diff --git a/torchchat/distributed/checkpoint.py b/torchchat/distributed/checkpoint.py index 1830e3a75..11e397469 100644 --- a/torchchat/distributed/checkpoint.py +++ b/torchchat/distributed/checkpoint.py @@ -96,6 +96,7 @@ def _load_checkpoints_from_storage( checkpoint_path, map_location=builder_args.device, mmap=True, + weights_only=False, ) diff --git a/torchchat/utils/gguf_loader.py b/torchchat/utils/gguf_loader.py index 309ff807c..c69bdf469 100644 --- a/torchchat/utils/gguf_loader.py +++ b/torchchat/utils/gguf_loader.py @@ -122,7 +122,7 @@ def linear_int4(input, weight_int4pack, scales_and_zeros, out_features, groupsiz input.dtype ) # cast back to input.dtype else: - c = torch.ops.aten._weight_int4pack_mm( + c = torch.ops.aten._weight_int4pack_mm_for_cpu( input, weight_int4pack, groupsize, @@ -570,6 +570,7 @@ def load_model_and_state_dict( load_state_dict: bool = True, load_as_quantized: bool = True, inner_k_tiles=8, + device="cpu", ) -> torch.nn.Module: """ Parses the GGUF file and returns an nn.Module on meta device along with a state_dict @@ -609,9 +610,17 @@ def load_model_and_state_dict( q, s, z = Q4_0.unpack(t) scales_and_zeros = pack_scales_and_zeros(s, z) q_uint8 = (q[::, ::2] << 4 | q[::, 1::2]).to(torch.uint8) - weight_int4pack = torch.ops.aten._convert_weight_to_int4pack( - q_uint8, inner_k_tiles - ) + + if torch.device(device).type == "cpu": + weight_int4pack = ( + torch.ops.aten._convert_weight_to_int4pack_for_cpu( + q, inner_k_tiles + ) + ) + else: + weight_int4pack = torch.ops.aten._convert_weight_to_int4pack( + q_uint8, inner_k_tiles + ) state_dict[f"{fqn}.weight"] = weight_int4pack state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros