diff --git a/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/BUILD.bazel b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/BUILD.bazel index 0f48950bc0..3455b744e6 100644 --- a/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/BUILD.bazel +++ b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/BUILD.bazel @@ -1,6 +1,6 @@ load("@rules_cc//cc:defs.bzl", "cc_library") +load(":fat_runtime.bzl", "hs_exec_variant") -# Generate config.h genrule( name = "config_h", outs = ["config.h"], @@ -24,6 +24,9 @@ genrule( #define HS_PATCH_VERSION 2 #define BUILD_DATE "2023-04-19" #define RELEASE_BUILD +#define FAT_RUNTIME +#define BUILD_AVX512 +#define BUILD_AVX512VBMI #endif /* CONFIG_H_ */ EOF @@ -100,54 +103,28 @@ cc_library( visibility = ["//visibility:private"], ) -# Runtime library (all C runtime sources) -# Note: This is a simplified build for x86_64 with -march=native. -# Hyperscan is a performance-critical library that heavily relies on SIMD -# instructions (SSE, AVX2, AVX512). For production use, consider building -# a fat runtime with multiple architecture targets similar to upstream CMake. +# Export the rename_symbols.sh script +exports_files(["rename_symbols.sh"]) + +# Build five architecture variants +hs_exec_variant("core2", "core2", "-march=core2") +hs_exec_variant("corei7", "corei7", "-march=corei7") +hs_exec_variant("avx2", "avx2", "-march=core-avx2") +hs_exec_variant("avx512", "avx512", "-march=skylake-avx512") +hs_exec_variant("avx512vbmi", "avx512vbmi", "-march=icelake-server") + +# Dispatcher library cc_library( - name = "hs_runtime", - srcs = glob( - [ - "src/crc32.c", - "src/database.c", - "src/runtime.c", - "src/stream_compress.c", - "src/hs_version.c", - "src/hs_valid_platform.c", - "src/fdr/*.c", - "src/hwlm/*.c", - "src/nfa/*.c", - "src/rose/*.c", - "src/som/*.c", - "src/util/masked_move.c", - "src/util/simd_utils.c", - "src/util/state_compress.c", - ], - exclude = [ - "src/**/*_dump*.c", - "src/**/test_*.c", - "src/hwlm/noodle_engine_avx2.c", # Textual include in noodle_engine.c - "src/hwlm/noodle_engine_avx512.c", # Textual include in noodle_engine.c - "src/hwlm/noodle_engine_sse.c", # Textual include in noodle_engine.c - ], - ), - hdrs = glob([ - "src/**/*.h", - ]), - textual_hdrs = [ - "src/hwlm/noodle_engine_avx2.c", - "src/hwlm/noodle_engine_avx512.c", - "src/hwlm/noodle_engine_sse.c", - ], + name = "hs_dispatcher", + srcs = ["dispatcher.c"], + hdrs = glob(["src/**/*.h"]), copts = [ "-std=c99", "-O3", "-DNDEBUG", "-fno-strict-aliasing", - "-march=native", # Required for SIMD optimizations "-Wno-unused-parameter", - "-Wno-sign-compare", + "-Wno-unused-function", ], includes = [ ".", @@ -161,6 +138,32 @@ cc_library( deps = [":hs_common"], ) +# Runtime library combining all variants with dispatcher +# Note: This implements fat runtime with multiple architecture targets +# to avoid SIGILL crashes when running on different CPUs. +cc_library( + name = "hs_runtime", + hdrs = glob(["src/**/*.h"]), + includes = [ + ".", + "src", + ], + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + visibility = ["//visibility:private"], + deps = [ + ":hs_common", + ":hs_dispatcher", + ":hs_exec_core2_renamed_import", + ":hs_exec_corei7_renamed_import", + ":hs_exec_avx2_renamed_import", + ":hs_exec_avx512_renamed_import", + ":hs_exec_avx512vbmi_renamed_import", + ], +) + # Compiler library (all C++ compile sources) cc_library( name = "hs_compile", @@ -205,10 +208,13 @@ cc_library( "-O2", "-DNDEBUG", "-fno-strict-aliasing", + "-fvisibility=hidden", "-Wno-unused-parameter", "-Wno-sign-compare", "-Wno-unused-variable", "-Wno-unused-but-set-variable", + "-Wno-unqualified-std-cast-call", + "-Wno-redundant-move", ], includes = [ ".", diff --git a/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/dispatcher.c b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/dispatcher.c new file mode 100644 index 0000000000..9a8afa623f --- /dev/null +++ b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/dispatcher.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2016-2020, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "hs_common.h" +#include "hs_runtime.h" +#include "ue2common.h" +#include "util/cpuid_inline.h" +#include "util/join.h" + +#if defined(DISABLE_AVX512_DISPATCH) +#define avx512_ disabled_ +#define check_avx512() (0) +#endif + +#if defined(DISABLE_AVX512VBMI_DISPATCH) +#define avx512vbmi_ disabled_ +#define check_avx512vbmi() (0) +#endif + +#define CREATE_DISPATCH(RTYPE, NAME, ...) \ + /* create defns */ \ + RTYPE JOIN(avx512vbmi_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(avx512_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(avx2_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(corei7_, NAME)(__VA_ARGS__); \ + RTYPE JOIN(core2_, NAME)(__VA_ARGS__); \ + \ + /* error func */ \ + static inline RTYPE JOIN(error_, NAME)(__VA_ARGS__) { \ + return (RTYPE)HS_ARCH_ERROR; \ + } \ + \ + /* resolver */ \ + static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ + if (check_avx512vbmi()) { \ + return JOIN(avx512vbmi_, NAME); \ + } \ + if (check_avx512()) { \ + return JOIN(avx512_, NAME); \ + } \ + if (check_avx2()) { \ + return JOIN(avx2_, NAME); \ + } \ + if (check_sse42() && check_popcnt()) { \ + return JOIN(corei7_, NAME); \ + } \ + if (check_ssse3()) { \ + return JOIN(core2_, NAME); \ + } \ + /* anything else is fail */ \ + return JOIN(error_, NAME); \ + } \ + \ + /* function */ \ + HS_PUBLIC_API \ + RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME))) + +CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data, + unsigned length, unsigned flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *userCtx); + +CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database, + size_t *stream_size); + +CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db, + size_t *size); +CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db); +CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db, + unsigned int flags, hs_stream_t **stream); + +CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data, + unsigned int length, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id, + hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt); + +CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db, + const char *const *data, const unsigned int *length, + unsigned int count, unsigned int flags, hs_scratch_t *scratch, + match_event_handler onevent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info); + +CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id, + const hs_stream_t *from_id); + +CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id, + unsigned int flags, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id, + const hs_stream_t *from_id, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db, + char **bytes, size_t *length); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes, + const size_t length, hs_database_t **db); + +CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes, + const size_t length, hs_database_t *db); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes, + size_t length, char **info); + +CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes, + const size_t length, size_t *deserialized_size); + +CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream, + char *buf, size_t buf_space, size_t *used_space); + +CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db, + hs_stream_t **stream, const char *buf,size_t buf_size); + +CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream, + const char *buf, size_t buf_size, hs_scratch_t *scratch, + match_event_handler onEvent, void *context); + +/** INTERNALS **/ + +CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen); diff --git a/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/fat_runtime.bzl b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/fat_runtime.bzl new file mode 100644 index 0000000000..7415be4dac --- /dev/null +++ b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/fat_runtime.bzl @@ -0,0 +1,109 @@ +"""Macro for building fat runtime variants of hyperscan.""" + +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +def hs_exec_variant(name, arch, march_flag): + """Build an architecture-specific variant of the runtime library. + + Args: + name: Name of the variant (e.g., "core2", "avx2") + arch: Architecture identifier for symbol prefixing + march_flag: GCC -march flag (e.g., "-march=core2") + """ + lib_name = "hs_exec_" + name + renamed_name = lib_name + "_renamed" + + # Compile the runtime sources with specific architecture flags + cc_library( + name = lib_name, + srcs = native.glob( + [ + "src/crc32.c", + "src/database.c", + "src/runtime.c", + "src/stream_compress.c", + "src/hs_version.c", + "src/hs_valid_platform.c", + "src/fdr/*.c", + "src/hwlm/*.c", + "src/nfa/*.c", + "src/rose/*.c", + "src/som/*.c", + "src/util/masked_move.c", + "src/util/simd_utils.c", + "src/util/state_compress.c", + ], + exclude = [ + "src/**/*_dump*.c", + "src/**/test_*.c", + "src/hwlm/noodle_engine_avx2.c", + "src/hwlm/noodle_engine_avx512.c", + "src/hwlm/noodle_engine_sse.c", + ], + ), + hdrs = native.glob(["src/**/*.h"]), + textual_hdrs = [ + "src/hwlm/noodle_engine_avx2.c", + "src/hwlm/noodle_engine_avx512.c", + "src/hwlm/noodle_engine_sse.c", + ], + copts = [ + "-std=c99", + "-O3", + "-DNDEBUG", + "-fno-strict-aliasing", + march_flag, + "-Wno-unused-parameter", + "-Wno-sign-compare", + ], + includes = [ + ".", + "src", + ], + linkstatic = True, + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + visibility = ["//visibility:private"], + deps = [":hs_common"], + ) + + # Extract the static library and rename symbols + native.genrule( + name = renamed_name, + srcs = [":" + lib_name], + outs = ["lib" + renamed_name + ".a"], + cmd = """ + # Find the .a file in the inputs + for f in $(SRCS); do + if [[ "$$f" == *.a ]]; then + INPUT_AR="$$f" + break + fi + done + if [ -z "$$INPUT_AR" ]; then + echo "Error: Could not find .a file in inputs" + exit 1 + fi + # Run the symbol renaming script + $(location :rename_symbols.sh) """ + arch + """ "$$INPUT_AR" $@ + """, + tools = [":rename_symbols.sh"], + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + ) + + # Import the renamed archive back as a cc_library + cc_import( + name = renamed_name + "_import", + static_library = ":" + renamed_name, + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + visibility = ["//visibility:private"], + ) + diff --git a/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/rename_symbols.sh b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/rename_symbols.sh new file mode 100755 index 0000000000..3ea9b36029 --- /dev/null +++ b/bazel-registry/modules/hyperscan/5.4.2.envoy/overlay/rename_symbols.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Symbol renaming script for hyperscan fat runtime +# Usage: rename_symbols.sh + +set -e + +PREFIX=$1 +INPUT_AR=$(realpath "$2") +OUTPUT_AR=$(realpath "$3") + +# Create temporary directory for work +TMPDIR=$(mktemp -d) +trap "rm -rf ${TMPDIR}" EXIT + +# Keep symbols (from cmake/keep.syms.in) +KEEPSYMS="${TMPDIR}/keep.syms" +cat > "${KEEPSYMS}" << 'EOF' +hs_misc_alloc +hs_misc_free +hs_free_scratch +hs_stream_alloc +hs_stream_free +hs_scratch_alloc +hs_scratch_free +hs_database_alloc +hs_database_free +^_ +EOF + +# Extract archive to temporary directory +cd "${TMPDIR}" +ar x "${INPUT_AR}" + +# Process each object file +for obj in *.o; do + SYMSFILE="${obj}.syms" + + # Get all global symbols from the object, filter out keep symbols, + # and create rename map + nm -f p -g "${obj}" | cut -f1 -d' ' | grep -v -f "${KEEPSYMS}" | sed -e "s/\(.*\)/\1 ${PREFIX}_\1/" > "${SYMSFILE}" + + # Rename symbols if any need renaming + if [ -s "${SYMSFILE}" ]; then + objcopy --redefine-syms="${SYMSFILE}" "${obj}" + fi + + rm -f "${SYMSFILE}" +done + +# Create output archive with renamed symbols +ar rcs "${OUTPUT_AR}" *.o + +# Return to original directory +cd - > /dev/null diff --git a/bazel-registry/modules/hyperscan/5.4.2.envoy/source.json b/bazel-registry/modules/hyperscan/5.4.2.envoy/source.json index ebaa1755d7..5d0b05aefc 100644 --- a/bazel-registry/modules/hyperscan/5.4.2.envoy/source.json +++ b/bazel-registry/modules/hyperscan/5.4.2.envoy/source.json @@ -7,7 +7,9 @@ }, "patch_strip": 1, "overlay": { - "BUILD.bazel": "sha256-L9XNtFqxLX+QdM/yHJ0j4qCCrY8qN1yuhtMN0yxVz4E=", - "MODULE.bazel": "sha256-rhTO03T2Bmq1sVdF5qhFZ74M8CPkFwl8g2qNmHpgTmQ=" + "BUILD.bazel": "sha256-i7NsaaQxXpWaXsXzjyMgki/tMefqt66V6kqihn/CoY0=", + "dispatcher.c": "sha256-QxcWJt0umsfxxzGhoMK6yUAxpNqQWdO+VDdXHcLUlTo=", + "fat_runtime.bzl": "sha256-CP3rZhMbOThXZBwGnNEpRomblPef6QtxBko2jkYDxKw=", + "rename_symbols.sh": "sha256-T89H3ivs1BvUGZWqyJqbZ3uz1kMxU8ZJKkxAGY3xMFs=" } }