diff --git a/WORKSPACE b/WORKSPACE index 6acf475d00ee2..fdee27a3048ac 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -155,11 +155,6 @@ rules_foreign_cc_dependencies(make_version = "4.2") # All of the clang related tools are provided under the `@llvm_toolchain_llvm` # repo. To see what's available run `bazel query @llvm_toolchain_llvm//...`. -# Version of the "toolchains_llvm" rule set, _not_ the version of clang/llvm. -TOOLCHAINS_LLVM_VERSION = "1.0.0" - -TOOLCHAINS_LLVM_INTEGRITY = "sha256-6RxDYfmQEaVIFOGvvlxDbg0ymHEUajzVjCOitK+1Bzc=" - # System roots that we use, this is where clang will search for things like libc. _SYSROOT_DARWIN_BUILD_FILE = """ @@ -222,15 +217,21 @@ http_archive( LLVM_VERSION = "18.1.8" # We have a few variants of our clang toolchain, either improving how it's built or adding new tools. -LLVM_VERSION_SUFFIX = "4" +LLVM_VERSION_SUFFIX = "5" + +# Version of the "toolchains_llvm" rule set, _not_ the version of clang/llvm. +# +# We depend on a commit that includes . +TOOLCHAINS_LLVM_VERSION = "9f0a7cb0f752ffd430a5c80d749a2e84cb348876" + +TOOLCHAINS_LLVM_INTEGRITY = "sha256-9SY8+RwP3KPfaLtjQGzJmknOcxEpTkmu/h1ntaljYdw=" maybe( http_archive, name = "toolchains_llvm", - canonical_id = "{0}".format(TOOLCHAINS_LLVM_VERSION), integrity = TOOLCHAINS_LLVM_INTEGRITY, strip_prefix = "toolchains_llvm-{0}".format(TOOLCHAINS_LLVM_VERSION), - url = "https://github.com/bazel-contrib/toolchains_llvm/releases/download/{0}/toolchains_llvm-{0}.tar.gz".format(TOOLCHAINS_LLVM_VERSION), + url = "https://github.com/bazel-contrib/toolchains_llvm/archive/{0}.tar.gz".format(TOOLCHAINS_LLVM_VERSION), ) load("@toolchains_llvm//toolchain:deps.bzl", "bazel_toolchain_dependencies") @@ -243,10 +244,10 @@ llvm_toolchain( name = "llvm_toolchain", llvm_version = LLVM_VERSION, sha256 = { - "darwin-aarch64": "41d8dea52d18c4e8b90c4fcd31965f9f297df9f40a38a33d60748dbe7f8330b8", - "darwin-x86_64": "291b8dd844aa896b98393c5d3beaee57f294768039eacdf9ef5e96ed9d3f62d7", - "linux-aarch64": "fe8f9e283ab43e963daf9ffb18742e134ad239b56078d61ef9a289ff642784ed", - "linux-x86_64": "8b725ec14e48bc1cb3698309506e29cd94ff3b823976ebb306e9c3ef84480c16", + "darwin-aarch64": "d28437d58615ccae434c289bcf5e0d10f031c732d7b60a5090541dbbbd381a01", + "darwin-x86_64": "f0c103bc2a19ffee7b55df2798188f085756e5668adc7c18d05c7f6705871828", + "linux-aarch64": "e9695cb9a8d068236b1c902af877be396eb0110d87525db49c68911b571f6976", + "linux-x86_64": "4d0bbec4acd23006d1353d0cd255144df8c910981f5474f3030bfde3dc75ccfa", }, sysroot = { "darwin-aarch64": "@sysroot_darwin_universal//:sysroot", @@ -338,7 +339,7 @@ RUST_VERSION = "1.83.0" RUST_NIGHTLY_VERSION = "nightly/2024-12-02" -load("//misc/bazel/toolchains:rust.bzl", "rust_toolchains") +load("//misc/bazel/toolchains:rust.bzl", "bindgen_toolchains", "rust_toolchains") rust_toolchains( [ @@ -413,6 +414,25 @@ rust_toolchains( }, ) +# Rust `bindgen` +# +# Rules and Toolchains for running [`bindgen`](https://github.com/rust-lang/rust-bindgen) +# a tool for generating Rust FFI bindings to C. + +load("@rules_rust//bindgen:repositories.bzl", "rust_bindgen_dependencies") + +rust_bindgen_dependencies() + +bindgen_toolchains( + "{0}-{1}".format(LLVM_VERSION, LLVM_VERSION_SUFFIX), + { + "darwin_aarch64": "sha256-YHP+DGnW3QmbKi93KYVOEq7oym0cbJga0MOgarWksds=", + "darwin_x86_64": "sha256-fA4eCuliHPYf3DBqsSVIQ53TyCfIUJJ3LaM5U3ZxxRc=", + "linux_aarch64": "sha256-GIB3n6vX/jpyFn2e7hI/pRzlP0fPeVhkVu2YDQUDS3A=", + "linux_x86_64": "sha256-a8BzSw50LoMYtEZretLgqizUFcpGpBGYa18rT9EnFgI=", + }, +) + # Load all dependencies for crate_universe. load("@rules_rust//crate_universe:repositories.bzl", "crate_universe_dependencies") @@ -438,33 +458,16 @@ crates_repository( deps = [":decnumber"], )], "librocksdb-sys": [crate.annotation( - additive_build_file = "@//misc/bazel/c_deps:rust-sys/BUILD.rocksdb.bazel", # Note: The below targets are from the additive build file. - # - # HACK(parkmycar): The `librocksdb-sys` build script runs bindgen for us, and to - # support cross compiling we need to provide the sysroot to the build script so - # bindgen can find it. Providing the sysroot and relying on the raw paths is quite - # fragile, the fix is to use `@rules_rust//bindgen/...` rules with our Clang toolchain. - build_script_data = [ - ":rocksdb_lib", - ":rocksdb_include", - ":snappy_lib", - "@linux_sysroot-aarch64//:sysroot", - "@linux_sysroot-x86_64//:sysroot", - ], - build_script_env = { - "ROCKSDB_STATIC": "true", - "ROCKSDB_LIB_DIR": "$(execpath :rocksdb_lib)", - "ROCKSDB_INCLUDE_DIR": "$(execpath :rocksdb_include)", - "SNAPPY_STATIC": "true", - "SNAPPY_LIB_DIR": "$(execpath :snappy_lib)", - "BINDGEN_EXTRA_CLANG_ARGS_aarch64-unknown-linux-gnu": "--sysroot=external/linux_sysroot-aarch64", - "BINDGEN_EXTRA_CLANG_ARGS_x86_64-unknown-linux-gnu": "--sysroot=external/linux_sysroot-x86_64", + additive_build_file = "@//misc/bazel/c_deps:rust-sys/BUILD.rocksdb.bazel", + compile_data = [":out_dir"], + gen_build_script = False, + rustc_env = { + "OUT_DIR": "$(execpath :out_dir)", }, - compile_data = [ - ":rocksdb_lib", - ":rocksdb_include", - ":snappy_lib", + deps = [ + ":bindings", + ":rocksdb", ], )], "tikv-jemalloc-sys": [crate.annotation( diff --git a/misc/bazel/c_deps/rust-sys/BUILD.rocksdb.bazel b/misc/bazel/c_deps/rust-sys/BUILD.rocksdb.bazel index f7300c7173e47..0193c237b47e6 100644 --- a/misc/bazel/c_deps/rust-sys/BUILD.rocksdb.bazel +++ b/misc/bazel/c_deps/rust-sys/BUILD.rocksdb.bazel @@ -13,105 +13,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@aspect_bazel_lib//lib:copy_file.bzl", "copy_file") +"""Additive BUILD file for the librocksdb-sys Rust crate.""" + load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory") load("@bazel_skylib//rules:select_file.bzl", "select_file") load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") +load("@rules_rust//bindgen:defs.bzl", "rust_bindgen") -"""Additive BUILD file for the librocksdb-sys Rust crate.""" - -# Copied from https://github.com/tensorflow/tensorflow/blob/bdd8bf316e4ab7d699127d192d30eb614a158462/third_party/snappy.BUILD +# Derived from cc_library( name = "snappy", srcs = [ "snappy/snappy.cc", - "snappy/snappy.h", - "snappy/snappy-internal.h", + "snappy/snappy-c.cc", "snappy/snappy-sinksource.cc", - "snappy/snappy-sinksource.h", - "snappy/snappy-stubs-internal.cc", - "snappy/snappy-stubs-internal.h", - ":config_h", - ":snappy_stubs_public_h", ], hdrs = [ + "snappy-stubs-public.h", "snappy/snappy.h", + "snappy/snappy-c.h", + "snappy/snappy-internal.h", + "snappy/snappy-sinksource.h", + "snappy/snappy-stubs-internal.h", ], - copts = [ - "-DHAVE_CONFIG_H", - "-fno-exceptions", - "-Wno-sign-compare", - "-Wno-shift-negative-value", - "-Wno-implicit-function-declaration", - ], - defines = ["HAVE_SYS_UIO_H"], - includes = ["snappy/."], -) - -genrule( - name = "config_h", - outs = ["snappy/config.h"], - cmd = "\n".join([ - "cat <<'EOF' >$@", - "#define HAVE_STDDEF_H 1", - "#define HAVE_STDINT_H 1", - "", - "#ifdef __has_builtin", - "# if !defined(HAVE_BUILTIN_EXPECT) && __has_builtin(__builtin_expect)", - "# define HAVE_BUILTIN_EXPECT 1", - "# endif", - "# if !defined(HAVE_BUILTIN_CTZ) && __has_builtin(__builtin_ctzll)", - "# define HAVE_BUILTIN_CTZ 1", - "# endif", - "#elif defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4)", - "# ifndef HAVE_BUILTIN_EXPECT", - "# define HAVE_BUILTIN_EXPECT 1", - "# endif", - "# ifndef HAVE_BUILTIN_CTZ", - "# define HAVE_BUILTIN_CTZ 1", - "# endif", - "#endif", - "", - "#ifdef __has_include", - "# if !defined(HAVE_BYTESWAP_H) && __has_include()", - "# define HAVE_BYTESWAP_H 1", - "# endif", - "# if !defined(HAVE_UNISTD_H) && __has_include()", - "# define HAVE_UNISTD_H 1", - "# endif", - "# if !defined(HAVE_SYS_ENDIAN_H) && __has_include()", - "# define HAVE_SYS_ENDIAN_H 1", - "# endif", - "# if !defined(HAVE_SYS_MMAN_H) && __has_include()", - "# define HAVE_SYS_MMAN_H 1", - "# endif", - "# if !defined(HAVE_SYS_UIO_H) && __has_include()", - "# define HAVE_SYS_UIO_H 1", - "# endif", - "#endif", + copts = ["-std=c++11"], + includes = [ "", - "#ifndef SNAPPY_IS_BIG_ENDIAN", - "# ifdef __s390x__", - "# define SNAPPY_IS_BIG_ENDIAN 1", - "# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__", - "# define SNAPPY_IS_BIG_ENDIAN 1", - "# endif", - "#endif", - "EOF", - ]), -) - -genrule( - name = "snappy_stubs_public_h", - srcs = ["snappy/snappy-stubs-public.h.in"], - outs = ["snappy/snappy-stubs-public.h"], - cmd = ("sed " + - "-e 's/$${\\(.*\\)_01}/\\1/g' " + - "-e 's/$${SNAPPY_MAJOR}/1/g' " + - "-e 's/$${SNAPPY_MINOR}/1/g' " + - "-e 's/$${SNAPPY_PATCHLEVEL}/4/g' " + - "$< >$@"), + "snappy", + ], + local_defines = ["NDEBUG=1"], ) filegroup( @@ -174,6 +105,7 @@ cmake( }, no_match_error = "Building rocksdb for the specified CPU is not supported.", ), + includes = ["include/rocksdb/c.h"], lib_source = ":rocksdb_srcs", out_static_libs = ["librocksdb.a"], targets = ["rocksdb"], @@ -186,52 +118,44 @@ cmake( ], ) -filegroup( - name = "out_dir", - srcs = [":rocksdb"], - visibility = ["//visibility:public"], -) - select_file( - name = "librocksdb", - srcs = ":out_dir", + name = "librocksdb_a", + srcs = ":rocksdb", subpath = "librocksdb.a", ) -copy_file( - name = "librocksdb_copy", - src = ":librocksdb", - out = "librocksdb.a", - allow_symlink = False, -) - -copy_to_directory( - name = "rocksdb_lib", - srcs = [":librocksdb_copy"], - visibility = ["//visibility:public"], +filegroup( + name = "rocksdb_include", + srcs = glob( + include = ["rocksdb/include/rocksdb/**/*.h"], + ), ) -# Copy the include folder so we can specify `ROCKSDB_INCLUDE_DIR` -# -# Note: We used to use `select_file` here but it generated symlinks that -# overlapped with others and spammed the logs with WARNINGs. -copy_to_directory( - name = "rocksdb_include", - srcs = [":out_dir"], - out = "include", - root_paths = ["rocksdb/include"], - visibility = ["//visibility:public"], +# We need to expose the header files with the rocksdb static lib. The `cmake` +# rules doesn't give us a way to do that, so we manually piece it together. +cc_import( + name = "librocksdb", + hdrs = [":rocksdb_include"], + static_library = ":librocksdb_a", ) -select_file( - name = "libsnappy", - srcs = ":snappy", - subpath = "libsnappy.a", +rust_bindgen( + name = "bindings", + bindgen_flags = [ + "--no-derive-debug", + "--blocklist-type=max_align_t", + "--ctypes-prefix=libc", + ], + cc_lib = ":librocksdb", + header = "rocksdb/include/rocksdb/c.h", ) +# Place the generated artifacts into an OUT_DIR. +# +# TODO(parkmycar): copy_to_directory( - name = "snappy_lib", - srcs = [":libsnappy"], + name = "out_dir", + srcs = [":bindings"], visibility = ["//visibility:public"], ) diff --git a/misc/bazel/toolchains/BUILD.bazel b/misc/bazel/toolchains/BUILD.bazel index 659c6c9e9d459..149518c29e8fa 100644 --- a/misc/bazel/toolchains/BUILD.bazel +++ b/misc/bazel/toolchains/BUILD.bazel @@ -1 +1,115 @@ -visibility = ["//visibility:public"] +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License in the LICENSE file at the +# root of this repository, or online at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Registering toolchains. + +We should only be _manually_ registering esoteric toolchains, more common +toolchains like C or Rust should be handled by a higher level rule set, e.g. +[`toolchains_llvm`](https://github.com/bazel-contrib/toolchains_llvm) or +[`rules_rust`](https://github.com/bazelbuild/rules_rust) respectively. + +Note: These registrations live here, and not in the `WORKSPACE` file or a +`.bzl` file because of Bazel limitations. + +See: +""" + +load("@rules_rust//bindgen:defs.bzl", "rust_bindgen_toolchain") + +# Rust Bindgen Toolchains +# +# [`bindgen`](https://github.com/rust-lang/rust-bindgen) automatically generates Rust FFI +# bindings to C libraries using `clang`. `rules_rust` provides the `bindgen` CLI tool +# and we need to provide the necessary parts of a `clang` toolchain. + +# Darwin aarch64 +rust_bindgen_toolchain( + name = "bindgen_toolchain_darwin__aarch64", + bindgen = "@rules_rust//bindgen/3rdparty:bindgen", + clang = "@rust_bindgen__darwin_aarch64//:clang", + libclang = "@rust_bindgen__darwin_aarch64//:libclang", + libstdcxx = "@rust_bindgen__darwin_aarch64//:libc++", +) + +toolchain( + name = "rust_bindgen_toolchain__darwin_aarch64", + exec_compatible_with = [ + "@platforms//os:macos", + ], + toolchain = "bindgen_toolchain_darwin__aarch64", + toolchain_type = "@rules_rust//bindgen:toolchain_type", + visibility = ["//visibility:public"], +) + +# Darwin x86_64 +rust_bindgen_toolchain( + name = "bindgen_toolchain_darwin__x86_64", + bindgen = "@rules_rust//bindgen/3rdparty:bindgen", + clang = "@rust_bindgen__darwin_x86_64//:clang", + libclang = "@rust_bindgen__darwin_x86_64//:libclang", + libstdcxx = "@rust_bindgen__darwin_x86_64//:libc++", +) + +toolchain( + name = "rust_bindgen_toolchain__darwin_x86_64", + exec_compatible_with = [ + "@platforms//os:macos", + "@platforms//cpu:x86_64", + ], + toolchain = "bindgen_toolchain_darwin__x86_64", + toolchain_type = "@rules_rust//bindgen:toolchain_type", + visibility = ["//visibility:public"], +) + +# Linux aarch64 +rust_bindgen_toolchain( + name = "bindgen_toolchain_linux__aarch64", + bindgen = "@rules_rust//bindgen/3rdparty:bindgen", + clang = "@rust_bindgen__linux_aarch64//:clang", + libclang = "@rust_bindgen__linux_aarch64//:libclang", + libstdcxx = "@rust_bindgen__linux_aarch64//:libc++", +) + +toolchain( + name = "rust_bindgen_toolchain__linux_aarch64", + exec_compatible_with = [ + "@platforms//os:linux", + "@platforms//cpu:aarch64", + ], + toolchain = "bindgen_toolchain_linux__aarch64", + toolchain_type = "@rules_rust//bindgen:toolchain_type", + visibility = ["//visibility:public"], +) + +# Linux x86_64 +rust_bindgen_toolchain( + name = "bindgen_toolchain_linux__x86_64", + bindgen = "@rules_rust//bindgen/3rdparty:bindgen", + clang = "@rust_bindgen__linux_x86_64//:clang", + libclang = "@rust_bindgen__linux_x86_64//:libclang", + libstdcxx = "@rust_bindgen__linux_x86_64//:libc++", +) + +toolchain( + name = "rust_bindgen_toolchain__linux_x86_64", + exec_compatible_with = [ + "@platforms//os:linux", + "@platforms//cpu:x86_64", + ], + toolchain = "bindgen_toolchain_linux__x86_64", + toolchain_type = "@rules_rust//bindgen:toolchain_type", + visibility = ["//visibility:public"], +) diff --git a/misc/bazel/toolchains/rust.bzl b/misc/bazel/toolchains/rust.bzl index ddb6926112e7f..c25fcd350b859 100644 --- a/misc/bazel/toolchains/rust.bzl +++ b/misc/bazel/toolchains/rust.bzl @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") load("@rules_rust//rust:repositories.bzl", "DEFAULT_TOOLCHAIN_TRIPLES", "rust_repository_set") def rust_toolchains(versions, targets): @@ -84,3 +86,52 @@ def _integrity_key(version, target, component): return "{0}/{1}-{2}-{3}".format(date, component, channel, target) else: return "{0}-{1}-{2}".format(component, version, target) + +_BINDGEN_TOOLCHAIN_BUILD_FILE = """ +package(default_visibility = ["//visibility:public"]) + +sh_binary( + name = "clang", + srcs = ["bin/clang"], +) + +cc_import( + name = "libclang", + shared_library = "lib/libclang.{SHARED_EXTENSION}", +) + +cc_import( + name = "libc++", + shared_library = "lib/{STDCXX}" +) +""" + +def bindgen_toolchains(clang_release, targets): + """ + Macro that registers [Rust bindgen] toolchains for the provided targets. + + [Rust bindgen](https://github.com/rust-lang/rust-bindgen) + + Args: + clang_release (string): Name of the clang dependency we'll fetch. + targets (dict[string, string]): Map of platform to the integrity for + the libclang toolchain we fetch. + """ + + for (platform, integrity) in targets.items(): + if platform.startswith("darwin"): + shared_extension = "dylib" + stdcxx = "libc++.1.0.dylib" + else: + shared_extension = "so" + stdcxx = "libc++.so.1.0" + + maybe( + http_archive, + name = "rust_bindgen__{0}".format(platform), + build_file_content = _BINDGEN_TOOLCHAIN_BUILD_FILE.format(SHARED_EXTENSION = shared_extension, STDCXX = stdcxx), + integrity = integrity, + url = "https://github.com/MaterializeInc/toolchains/releases/download/clang-{0}/{1}_libclang.tar.zst".format(clang_release, platform), + ) + + native.register_toolchains("@//misc/bazel/toolchains:rust_bindgen_toolchain__{0}".format(platform)) diff --git a/misc/images/bazel/Dockerfile b/misc/images/bazel/Dockerfile index 490f550661c85..28f576984b71e 100644 --- a/misc/images/bazel/Dockerfile +++ b/misc/images/bazel/Dockerfile @@ -15,13 +15,16 @@ RUN apt-get update \ && TZ=UTC DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ ca-certificates \ curl \ - g++ \ - gcc + git \ + libxml2-dev -# Download the bazel binary from the official GitHub releases since the apt repositories do not -# contain arm64 releases. -RUN arch_bazel=$(echo "$ARCH_GCC" | sed "s/aarch64/arm64/") \ - && curl -fsSL -o /usr/local/bin/bazel https://github.com/bazelbuild/bazel/releases/download/6.3.2/bazel-6.3.2-linux-$arch_bazel \ - && if [[ "$arch_bazel" = arm64 ]]; then echo '9d88a0b206e22cceb4afe0060be7f294b423f5f49b18750fbbd7abd47cea4054 /usr/local/bin/bazel' | sha256sum --check; fi \ - && if [[ "$arch_bazel" = amd64 ]]; then echo 'e78fc3394deae5408d6f49a15c7b1e615901969ecf6e50d55ef899996b0b8458 /usr/local/bin/bazel' | sha256sum --check; fi \ +# Bazelisk is a Bazel runner that reads the version from our .bazelversion file. +ENV BAZELISK_VERSION="1.25.0" + +# Download the binary from the official GitHub releases since the apt repositories do not contain +# arm64 releases. +RUN arch_bazel=$(echo "$ARCH_GCC" | sed "s/aarch64/arm64/" | sed "s/x86_64/amd64/" ) \ + && curl -fsSL -o /usr/local/bin/bazel https://github.com/bazelbuild/bazelisk/releases/download/v$BAZELISK_VERSION/bazelisk-linux-$arch_bazel \ + && if [[ "$arch_bazel" = arm64 ]]; then echo '4c8d966e40ac2c4efcc7f1a5a5cceef2c0a2f16b957e791fa7a867cce31e8fcb /usr/local/bin/bazel' | sha256sum --check; fi \ + && if [[ "$arch_bazel" = amd64 ]]; then echo 'fd8fdff418a1758887520fa42da7e6ae39aefc788cf5e7f7bb8db6934d279fc4 /usr/local/bin/bazel' | sha256sum --check; fi \ && chmod +x /usr/local/bin/bazel