From 7a46d6e2569070a4fdeac8dfa317317dfac097a3 Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Thu, 3 Jul 2025 08:30:41 -0500 Subject: [PATCH] Add zstd support on Unix --- cpython-unix/Makefile | 4 ++ cpython-unix/build-cpython.sh | 2 +- cpython-unix/build-zstd.sh | 63 ++++++++++++++++++++++++++++++ cpython-unix/build.py | 1 + cpython-unix/extension-modules.yml | 7 ++-- cpython-unix/targets.yml | 24 ++++++++++++ src/validation.rs | 4 +- 7 files changed, 98 insertions(+), 7 deletions(-) create mode 100755 cpython-unix/build-zstd.sh diff --git a/cpython-unix/Makefile b/cpython-unix/Makefile index 6e4b0070..dcb239aa 100644 --- a/cpython-unix/Makefile +++ b/cpython-unix/Makefile @@ -236,6 +236,9 @@ $(OUTDIR)/xz-$(XZ_VERSION)-$(PACKAGE_SUFFIX).tar: $(PYTHON_DEP_DEPENDS) $(HERE)/ $(OUTDIR)/zlib-$(ZLIB_VERSION)-$(PACKAGE_SUFFIX).tar: $(PYTHON_DEP_DEPENDS) $(HERE)/build-zlib.sh $(RUN_BUILD) --docker-image $(DOCKER_IMAGE_BUILD) zlib +$(OUTDIR)/zstd-$(ZSTD_VERSION)-$(PACKAGE_SUFFIX).tar: $(PYTHON_DEP_DEPENDS) $(HERE)/build-zstd.sh + $(RUN_BUILD) --docker-image $(DOCKER_IMAGE_BUILD) zstd + PYTHON_HOST_DEPENDS := \ $(PYTHON_DEP_DEPENDS) \ $(HERE)/build-cpython-host.sh \ @@ -272,6 +275,7 @@ PYTHON_DEPENDS_$(1) := \ $$(if $$(NEED_UUID),$$(OUTDIR)/uuid-$$(UUID_VERSION)-$$(PACKAGE_SUFFIX).tar) \ $$(if $$(NEED_XZ),$$(OUTDIR)/xz-$$(XZ_VERSION)-$$(PACKAGE_SUFFIX).tar) \ $$(if $$(NEED_ZLIB),$$(OUTDIR)/zlib-$$(ZLIB_VERSION)-$$(PACKAGE_SUFFIX).tar) \ + $$(if $$(NEED_ZSTD),$$(OUTDIR)/zstd-$$(ZSTD_VERSION)-$$(PACKAGE_SUFFIX).tar) \ $$(NULL) ALL_PYTHON_DEPENDS_$(1) = \ diff --git a/cpython-unix/build-cpython.sh b/cpython-unix/build-cpython.sh index ce88e6c7..4a85ba73 100755 --- a/cpython-unix/build-cpython.sh +++ b/cpython-unix/build-cpython.sh @@ -1213,7 +1213,7 @@ ${BUILD_PYTHON} ${ROOT}/fix_shebangs.py ${ROOT}/out/python/install # downstream consumers. OBJECT_DIRS="Objects Parser Parser/lexer Parser/pegen Parser/tokenizer Programs Python Python/deepfreeze" OBJECT_DIRS="${OBJECT_DIRS} Modules" -for ext in _blake2 cjkcodecs _ctypes _ctypes/darwin _decimal _expat _hacl _io _multiprocessing _sha3 _sqlite _sre _testinternalcapi _xxtestfuzz ; do +for ext in _blake2 cjkcodecs _ctypes _ctypes/darwin _decimal _expat _hacl _io _multiprocessing _sha3 _sqlite _sre _testinternalcapi _xxtestfuzz _zstd; do OBJECT_DIRS="${OBJECT_DIRS} Modules/${ext}" done diff --git a/cpython-unix/build-zstd.sh b/cpython-unix/build-zstd.sh new file mode 100755 index 00000000..e81ef781 --- /dev/null +++ b/cpython-unix/build-zstd.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +set -ex + +ROOT=`pwd` + +export PATH=${TOOLS_PATH}/${TOOLCHAIN}/bin:${TOOLS_PATH}/host/bin:$PATH +export PREFIX="/tools/deps" + +tar -xf zstd-${ZSTD_VERSION}.tar.gz + +pushd cpython-source-deps-zstd-${ZSTD_VERSION}/lib + +if [ "${CC}" = "musl-clang" ]; then + # In order to build the library with SSE2, BMI, and AVX2 intrinstics, we need musl-clang to find + # headers that provide access to the intrinsics, as they are not provided by musl. These are + # part of the include files that are part of clang. But musl-clang eliminates them from the + # default include path. So copy them into place. + for h in ${TOOLS_PATH}/${TOOLCHAIN}/lib/clang/*/include/*intrin.h ${TOOLS_PATH}/${TOOLCHAIN}/lib/clang/*/include/{__wmmintrin_aes.h,__wmmintrin_pclmul.h,emmintrin.h,immintrin.h,mm_malloc.h}; do + filename=$(basename "$h") + if [ -e "${TOOLS_PATH}/host/include/${filename}" ]; then + echo "warning: ${filename} already exists" + fi + cp "$h" ${TOOLS_PATH}/host/include/ + done + EXTRA_TARGET_CFLAGS="${EXTRA_TARGET_CFLAGS} -I${TOOLS_PATH}/host/include/" + + # `qsort_r` is only available in musl 1.2.3+ but we use 1.2.2. The zstd source provides a + # fallback implementation, but they do not have a `configure`-style detection of whether + # `qsort_r` is actually available so we patch it to include a check for glibc. + patch -p1 <suffix, ctx->suffixSize, sizeof(U32), + ctx, + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +-#elif defined(_GNU_SOURCE) ++#elif defined(_GNU_SOURCE) && defined(__GLIBC__) + qsort_r(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp), + ctx); +EOF +fi + +CFLAGS="${EXTRA_TARGET_CFLAGS} -fPIC" LDFLAGS="${EXTRA_TARGET_LDFLAGS}" make -j ${NUM_CPUS} libzstd.a +make -j ${NUM_CPUS} install-static DESTDIR=${ROOT}/out +make -j ${NUM_CPUS} install-includes DESTDIR=${ROOT}/out +make -j ${NUM_CPUS} install-pc DESTDIR=${ROOT}/out diff --git a/cpython-unix/build.py b/cpython-unix/build.py index 01d39b2a..b35d1721 100755 --- a/cpython-unix/build.py +++ b/cpython-unix/build.py @@ -1163,6 +1163,7 @@ def main(): "xtrans", "xz", "zlib", + "zstd", ): tools_path = "host" if action in ("m4", "patchelf") else "deps" diff --git a/cpython-unix/extension-modules.yml b/cpython-unix/extension-modules.yml index 3d69f049..4b834563 100644 --- a/cpython-unix/extension-modules.yml +++ b/cpython-unix/extension-modules.yml @@ -772,15 +772,14 @@ _xxtestfuzz: - _xxtestfuzz/fuzzer.c _zstd: - # Disable on all targets until we add a zstd library - disabled-targets: - - .* minimum-python-version: '3.14' sources: - _zstd/_zstdmodule.c - - _zstd/zdict.c + - _zstd/zstddict.c - _zstd/compressor.c - _zstd/decompressor.c + links: + - zstd _zoneinfo: minimum-python-version: "3.9" diff --git a/cpython-unix/targets.yml b/cpython-unix/targets.yml index 95ef58b2..33db80c8 100644 --- a/cpython-unix/targets.yml +++ b/cpython-unix/targets.yml @@ -108,6 +108,7 @@ aarch64-apple-darwin: - tk - uuid - xz + - zstd openssl_target: darwin64-arm64-cc aarch64-apple-ios: @@ -151,6 +152,7 @@ aarch64-apple-ios: - openssl-3.0 - sqlite - xz + - zstd openssl_target: ios64-cross aarch64-unknown-linux-gnu: @@ -198,6 +200,7 @@ aarch64-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-aarch64 # Blocked on: # BOLT-ERROR: Cannot relax adr in non-simple function @@ -245,6 +248,7 @@ arm64-apple-tvos: - openssl-3.0 - sqlite - xz + - zstd openssl_target: todo armv7-unknown-linux-gnueabi: @@ -286,6 +290,7 @@ armv7-unknown-linux-gnueabi: - xorgproto - xz - zlib + - zstd openssl_target: linux-armv4 armv7-unknown-linux-gnueabihf: @@ -327,6 +332,7 @@ armv7-unknown-linux-gnueabihf: - xorgproto - xz - zlib + - zstd openssl_target: linux-armv4 mips-unknown-linux-gnu: @@ -368,6 +374,7 @@ mips-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-mips32 mipsel-unknown-linux-gnu: @@ -409,6 +416,7 @@ mipsel-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-mips32 ppc64le-unknown-linux-gnu: @@ -450,6 +458,7 @@ ppc64le-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-ppc64le riscv64-unknown-linux-gnu: @@ -491,6 +500,7 @@ riscv64-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux64-riscv64 s390x-unknown-linux-gnu: @@ -532,6 +542,7 @@ s390x-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux64-s390x thumb7k-apple-watchos: @@ -574,6 +585,7 @@ thumb7k-apple-watchos: - openssl-3.0 - sqlite - xz + - zstd openssl_target: todo # Intel macOS. @@ -629,6 +641,7 @@ x86_64-apple-darwin: - tk - uuid - xz + - zstd openssl_target: darwin64-x86_64-cc x86_64-apple-ios: @@ -672,6 +685,7 @@ x86_64-apple-ios: - openssl-3.0 - sqlite - xz + - zstd openssl_target: darwin64-x86_64-cc x86_64-apple-tvos: @@ -714,6 +728,7 @@ x86_64-apple-tvos: - openssl-3.0 - sqlite - xz + - zstd openssl_target: todo x86_64-apple-watchos: @@ -756,6 +771,7 @@ x86_64-apple-watchos: - openssl-3.0 - sqlite - xz + - zstd openssl_target: todo x86_64-unknown-linux-gnu: @@ -801,6 +817,7 @@ x86_64-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 bolt_capable: true @@ -848,6 +865,7 @@ x86_64_v2-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 bolt_capable: true @@ -895,6 +913,7 @@ x86_64_v3-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 bolt_capable: true @@ -942,6 +961,7 @@ x86_64_v4-unknown-linux-gnu: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 bolt_capable: true @@ -987,6 +1007,7 @@ x86_64-unknown-linux-musl: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 x86_64_v2-unknown-linux-musl: @@ -1032,6 +1053,7 @@ x86_64_v2-unknown-linux-musl: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 x86_64_v3-unknown-linux-musl: @@ -1077,6 +1099,7 @@ x86_64_v3-unknown-linux-musl: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 x86_64_v4-unknown-linux-musl: @@ -1122,4 +1145,5 @@ x86_64_v4-unknown-linux-musl: - xorgproto - xz - zlib + - zstd openssl_target: linux-x86_64 diff --git a/src/validation.rs b/src/validation.rs index 9084754b..5ab71708 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -778,6 +778,7 @@ const GLOBAL_EXTENSIONS_PYTHON_3_14: &[&str] = &[ "_zoneinfo", "_hmac", "_types", + "_zstd", ]; const GLOBAL_EXTENSIONS_MACOS: &[&str] = &["_scproxy"]; @@ -813,8 +814,7 @@ const GLOBAL_EXTENSIONS_WINDOWS: &[&str] = &[ "winsound", ]; -// TODO(zanieb): Move `_zstd` to non-Windows specific once we add support on Unix. -const GLOBAL_EXTENSIONS_WINDOWS_3_14: &[&str] = &["_wmi", "_zstd"]; +const GLOBAL_EXTENSIONS_WINDOWS_3_14: &[&str] = &["_wmi"]; const GLOBAL_EXTENSIONS_WINDOWS_PRE_3_13: &[&str] = &["_msi"];