diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml deleted file mode 100644 index c4551dc..0000000 --- a/.github/workflows/build.yaml +++ /dev/null @@ -1,119 +0,0 @@ ---- -name: Build quantize binary - -permissions: - contents: read - -on: - push: - branches: - - main - tags: - - "v*" - pull_request: - branches: - - main - workflow_dispatch: - -jobs: - macos-build: - name: "Build quantize on macOS ARM64 (M1)" - runs-on: "macos-14" - steps: - - uses: "actions/checkout@v4" - with: - submodules: true - - - name: system info - run: sysctl -a - - - name: make build/quantize from llama.cpp sources - env: - CMAKE_ARGS: "-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON" - run: make quantize - - - name: file info - run: file build/quantize-arm64-darwin - - - name: test quantize - run: | - build/quantize-arm64-darwin \ - llama.cpp/models/ggml-vocab-llama.gguf \ - /tmp/ggml-vocab-Q4_K_M.gguf \ - Q4_K_M - - - uses: actions/upload-artifact@v4 - with: - name: "quantize-arm64-darwin" - path: build/quantize-arm64-darwin - - linux-build: - name: "Build quantize on Linux for ${{ matrix.arch }}" - runs-on: "ubuntu-latest" - strategy: - fail-fast: true - matrix: - include: - - arch: "amd64" - suffix: "x86_64-linux" - image: quay.io/sclorg/python-312-c8s:c8s - - arch: "arm64" - suffix: "aarch64-linux" - image: quay.io/sclorg/python-312-c8s:c8s - steps: - - uses: "actions/checkout@v4" - with: - submodules: true - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Pull ${{ matrix.image }} for linux/${{ matrix.arch }} - run: | - docker pull --platform linux/${{ matrix.arch }} ${{ matrix.image }} - - - name: make build/quantize from llama.cpp sources - run: | - set -e - docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a - docker run --platform linux/${{ matrix.arch }} \ - -v .:/opt/app-root/src \ - -e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \ - ${{ matrix.image }} \ - make quantize - - - name: file info - run: file build/quantize-${{ matrix.suffix }} - - - name: file symbols - run: nm -a build/quantize-${{ matrix.suffix }} | grep -o "GLIBC.*" | sort -u - - - name: test quantize - run: | - docker run --platform linux/${{ matrix.arch }} \ - -v .:/opt/app-root/src \ - ${{ matrix.image }} \ - build/quantize-${{ matrix.suffix }} \ - llama.cpp/models/ggml-vocab-llama.gguf \ - /tmp/ggml-vocab-Q4_K_M.gguf \ - Q4_K_M - - - uses: actions/upload-artifact@v4 - with: - name: "quantize-${{ matrix.suffix }}" - path: build/quantize-${{ matrix.suffix }} - - merge-artifacts: - name: Merge artifacts - runs-on: ubuntu-latest - needs: - - macos-build - - linux-build - steps: - - name: Merge artifacts - uses: actions/upload-artifact/merge@v4 - with: - name: quantize diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml index d830efa..4fa14e4 100644 --- a/.github/workflows/pypi.yaml +++ b/.github/workflows/pypi.yaml @@ -14,12 +14,6 @@ on: types: - published -permissions: - # allow gh release upload - contents: write - # see https://docs.pypi.org/trusted-publishers/ - id-token: write - jobs: build-package: name: Build and check packages @@ -29,27 +23,73 @@ jobs: with: # for setuptools-scm fetch-depth: 0 + submodules: true - uses: hynek/build-and-inspect-python-package@v2 + with: + skip-wheel: 'false' + + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + # macos-13 is an intel runner, macos-14 is apple silicon + os: [ubuntu-latest, ubuntu-24.04-arm, macos-14] + + steps: + - uses: actions/checkout@v4 + with: + # for setuptools-scm + fetch-depth: 0 + submodules: true + + - name: Build wheels + uses: pypa/cibuildwheel@v2.22.0 + + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl + + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + # for setuptools-scm + fetch-depth: 0 + submodules: true + + - name: Build sdist + run: pipx run build --sdist + + - uses: actions/upload-artifact@v4 + with: + name: cibw-sdist + path: dist/*.tar.gz publish-test-pypi: name: Publish packages to test.pypi.org # environment: publish-test-pypi - # TODO: move to instructlab + permissions: + id-token: write if: | - github.repository_owner == 'tiran' && ( + github.repository_owner == 'instructlab' && ( github.event.action == 'published' || (github.event_name == 'push' && github.ref == 'refs/heads/main') ) runs-on: ubuntu-latest - needs: build-package + needs: [build_wheels, build_sdist] steps: - name: Fetch build artifacts uses: actions/download-artifact@v4 with: - name: Packages + pattern: cibw-* path: dist + merge-multiple: true - name: Upload to Test PyPI uses: pypa/gh-action-pypi-publish@release/v1 @@ -59,20 +99,23 @@ jobs: publish-pypi: name: Publish release to pypi.org # environment: publish-pypi - # TODO: move to instructlab + permissions: + contents: write + id-token: write if: | - github.repository_owner == 'tiran' && github.event.action == 'published' + github.repository_owner == 'instructlab' && github.event.action == 'published' runs-on: ubuntu-latest - needs: build-package + needs: [build_wheels, build_sdist] steps: - name: Fetch build artifacts uses: actions/download-artifact@v4 with: - name: Packages + pattern: cibw-* path: dist + merge-multiple: true - - uses: sigstore/gh-action-sigstore-python@v2.1.1 + - uses: sigstore/gh-action-sigstore-python@v3.0.0 with: inputs: >- ./dist/*.tar.gz diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 04fa1ca..31726ee 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,11 +26,13 @@ jobs: - "3.10" - "3.11" - "3.12" - - "3.13-dev" + - "3.13" steps: - uses: "actions/checkout@v4" with: - submodules: true + submodules: true + # for setuptools-scm + fetch-depth: 0 - uses: "actions/setup-python@v5" with: @@ -38,7 +40,7 @@ jobs: allow-prereleases: true - name: "Update pip" - run: python -m pip install --upgrade pip setuptools wheel + run: python -m pip install --upgrade pip - name: "Install tox dependencies" run: python -m pip install --upgrade tox tox-gh-actions @@ -55,7 +57,7 @@ jobs: submodules: true - name: "Update pip" - run: python -m pip install --upgrade pip setuptools wheel + run: python -m pip install --upgrade pip - name: "Install tox dependencies" run: python -m pip install --upgrade tox diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..79255cd --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,23 @@ +include tox.ini tests.py .pylintrc +recursive-include llama.cpp * +exclude llama.cpp/.git + +global-exclude gguf.inp gguf.out +exclude llama.cpp/models/ggml-vocab-aquila.gguf +exclude llama.cpp/models/ggml-vocab-baichuan.gguf +exclude llama.cpp/models/ggml-vocab-bert-bge.gguf +exclude llama.cpp/models/ggml-vocab-command-r.gguf +exclude llama.cpp/models/ggml-vocab-deepseek-coder.gguf +exclude llama.cpp/models/ggml-vocab-deepseek-llm.gguf +exclude llama.cpp/models/ggml-vocab-falcon.gguf +exclude llama.cpp/models/ggml-vocab-gpt2.gguf +exclude llama.cpp/models/ggml-vocab-gpt-neox.gguf +# used in tests.py +# exclude llama.cpp/models/ggml-vocab-llama-bpe.gguf +exclude llama.cpp/models/ggml-vocab-llama-spm.gguf +exclude llama.cpp/models/ggml-vocab-mpt.gguf +exclude llama.cpp/models/ggml-vocab-phi-3.gguf +exclude llama.cpp/models/ggml-vocab-qwen2.gguf +exclude llama.cpp/models/ggml-vocab-refact.gguf +exclude llama.cpp/models/ggml-vocab-stablelm-3b-4e1t.gguf +exclude llama.cpp/models/ggml-vocab-starcoder.gguf diff --git a/Makefile b/Makefile deleted file mode 100644 index d93152e..0000000 --- a/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -CMAKE_ARGS ?= - -UNAME_MACHINE = $(shell uname -m | tr A-Z a-z) -UNAME_OS = $(shell uname -s | tr A-Z a-z) -QUANTIZE = build/quantize-$(UNAME_MACHINE)-$(UNAME_OS) -LLAMA_BUILDDIR = build/llama.cpp-$(UNAME_MACHINE)-$(UNAME_OS) -LLAMA_DIR = llama.cpp - - -.PHONY: all -all: test $(QUANTIZE) - -.PHONY: test -test: - tox p - -.PHONY: fix -fix: - tox -e format -- - tox -e ruff -- --fix - -.PHONY: clean -clean: - rm -rf .tox .ruff_cache dist build - -$(LLAMA_BUILDDIR)/Makefile: $(LLAMA_DIR)/CMakeLists.txt - @mkdir -p $(dir $@) - CMAKE_ARGS="$(CMAKE_ARGS)" cmake -S $(dir $<) -B $(dir $@) - -$(LLAMA_BUILDDIR)/bin/quantize: $(LLAMA_BUILDDIR)/Makefile - cmake --build $(dir $<) --parallel 2 --config Release --target quantize - -.PHONY: quantize -quantize: $(QUANTIZE) - -$(QUANTIZE): $(LLAMA_BUILDDIR)/bin/quantize - cp -a $< $@ diff --git a/llama.cpp b/llama.cpp index 784e11d..b95c8af 160000 --- a/llama.cpp +++ b/llama.cpp @@ -1 +1 @@ -Subproject commit 784e11dea1f5ce9638851b2b0dddb107e2a609c8 +Subproject commit b95c8af37ccf169b0a3216b7ed691af0534e5091 diff --git a/pyproject.toml b/pyproject.toml index cea6f47..ca46d09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 [build-system] -requires = ["setuptools>=64", "setuptools_scm>=8"] +requires = ["setuptools>=64", "setuptools_scm>=8", "wheel"] build-backend = "setuptools.build_meta" [project] @@ -34,10 +34,12 @@ dynamic = ["version"] [project.urls] # TODO: move the project to instructlab # homepage = "https://instructlab.io" -source = "https://github.com/tiran/instructlab-quantize" -issues = "https://github.com/tiran/instructlab-quantize/issues" +source = "https://github.com/instructlab/instructlab-quantize" +issues = "https://github.com/instructlab/instructlab-quantize/issues" [tool.setuptools_scm] +# do not include +gREV local version, required for Test PyPI upload +local_scheme = "no-local-version" [tool.setuptools] package-dir = {"" = "src"} diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d0652e0 --- /dev/null +++ b/setup.py @@ -0,0 +1,116 @@ +import os +import platform +import subprocess +import sys + +from setuptools import setup +from setuptools.command.build_py import build_py +from setuptools.dist import Distribution +from wheel.bdist_wheel import bdist_wheel as bdist_wheel + +CMAKE_ARGS = [ + "-DCMAKE_BUILD_TYPE=Release", + "-DBUILD_SHARED_LIBS=OFF", + # build with base ISA + "-DGGML_NATIVE=OFF", + "-DLLAMA_NATIVE=OFF", + "-DLLAMA_BUILD_TESTS=OFF", + "-DLLAMA_BUILD_SERVER=OFF", +] +CMAKE_ARGS_X86_64 = [ + # force x86_64-v2 ISA + "-DGGML_AVX=OFF", + "-DGGML_AVX2=OFF", + "-DGGML_FMA=OFF", + "-DGGML_F16C=OFF", + "-DLLAMA_AVX=OFF", + "-DLLAMA_AVX2=OFF", + "-DLLAMA_FMA=OFF", + "-DLLAMA_F16C=OFF", +] +CMAKE_ARGS_DARWIN_AARCH64 = [ + # build and embed METAL on Apple M + "-DGGML_METAL=ON", + "-DGGML_METAL_EMBED_LIBRARY=ON", + "-DLLAMA_METAL=ON", + "-DLLAMA_METAL_EMBED_LIBRARY=ON", +] +QUANTIZE_BINARY = "llama-quantize" + + +class Py3NoneBdistWheel(bdist_wheel): + """Tag wheel as py3-none-{tag}""" + + def finalize_options(self) -> None: + super().finalize_options() + self.root_is_pure = False + + def get_tag(self) -> tuple[str, str, str]: + _py, _abi, plat_name = super().get_tag() + return "py3", "none", plat_name + + +class QuantizeBuildPy(build_py): + """Hack to build and copy quantize binary with Python files""" + + def build_quantize(self) -> None: + # Switch to scikit-build-core? I have not found an example how to + # ship a program with scikit-build-core. + arch = platform.uname().machine + build_cmd = self.get_finalized_command("build") + package_name = self.distribution.packages[0] + build_temp = build_cmd.build_temp + cmake_args = [ + "cmake", + "-S", + "llama.cpp", + "-B", + build_temp, + ] + cmake_args.extend(CMAKE_ARGS) + if sys.platform == "darwin" and arch == "aarch64": + cmake_args.extend(CMAKE_ARGS_DARWIN_AARCH64) + elif arch == "x86_64": + cmake_args.extend(CMAKE_ARGS_X86_64) + print(f"Run {' '.join(cmake_args)}") + subprocess.check_call(cmake_args) + + build_args = [ + "cmake", + "--build", + build_temp, + "--config", + "Release", + "--target", + QUANTIZE_BINARY, + ] + print(f"Run {' '.join(build_args)}") + subprocess.check_call(build_args) + + infile = os.path.join(build_temp, "bin", QUANTIZE_BINARY) + outname = f"quantize-{arch}-{sys.platform}" + outfile = os.path.join(self.build_lib, package_name, outname) + directory = os.path.dirname(outfile) + os.makedirs(directory, exist_ok=True) + self.copy_file(infile, outfile, preserve_mode=True) + self.package_data[package_name] = [outname] + + def run(self) -> None: + self.build_quantize() + return super().run() + + +class BinaryDistribution(Distribution): + """Mark package has platlib package""" + + def has_ext_modules(foo) -> bool: + return True + + +setup( + distclass=BinaryDistribution, + cmdclass={ + "bdist_wheel": Py3NoneBdistWheel, + "build_py": QuantizeBuildPy, + }, +) diff --git a/src/instructlab_quantize/quantize-aarch64-linux b/src/instructlab_quantize/quantize-aarch64-linux deleted file mode 100755 index 107319d..0000000 Binary files a/src/instructlab_quantize/quantize-aarch64-linux and /dev/null differ diff --git a/src/instructlab_quantize/quantize-arm64-darwin b/src/instructlab_quantize/quantize-arm64-darwin deleted file mode 100755 index 83a9a9e..0000000 Binary files a/src/instructlab_quantize/quantize-arm64-darwin and /dev/null differ diff --git a/src/instructlab_quantize/quantize-x86_64-linux b/src/instructlab_quantize/quantize-x86_64-linux deleted file mode 100755 index 80461a4..0000000 Binary files a/src/instructlab_quantize/quantize-x86_64-linux and /dev/null differ diff --git a/tests.py b/tests.py index 01d120b..3afb0c7 100644 --- a/tests.py +++ b/tests.py @@ -7,9 +7,10 @@ import sys from unittest import mock -import instructlab_quantize import pytest +import instructlab_quantize + PKG_DIR = pathlib.Path(instructlab_quantize.__file__).absolute().parent @@ -42,7 +43,7 @@ def test_run_quantize(tmp_path: pathlib.Path): quant_type = "Q4_K_M" outfile = tmp_path / "ggml-vocab-{quant_type}.gguf" instructlab_quantize.run_quantize( - "llama.cpp/models/ggml-vocab-llama.gguf", + "llama.cpp/models/ggml-vocab-llama-bpe.gguf", os.fspath(outfile), quant_type, ) diff --git a/tox.ini b/tox.ini index c8483a3..2612559 100644 --- a/tox.ini +++ b/tox.ini @@ -41,6 +41,16 @@ deps = commands = ruff format {posargs:--check} +[testenv:fix] +description = fix code with Ruff +skip_install = True +skipsdist = true +deps = + ruff +commands = + ruff format + ruff check --fix + [gh-actions] python = 3.9: py39