diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index af4adf4d..d520b020 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -30,13 +30,16 @@ concurrency: jobs: build: - name: ${{ matrix.cxx }}, ${{ matrix.build_type }}, ivf=${{ matrix.ivf }} + name: ${{ matrix.cxx }}, ${{ matrix.build_type }}, ivf=${{ matrix.ivf }}, asan=${{ matrix.asan }} runs-on: ubuntu-22.04 strategy: matrix: build_type: [RelWithDebugInfo] ivf: [OFF, ON] cxx: [g++-11, g++-12, clang++-15] + asan: [OFF] + cmake_extra_args: ["-DSVS_BUILD_BINARIES=YES -DSVS_BUILD_EXAMPLES=YES"] + ctest_args: [""] include: - cxx: g++-11 cc: gcc-11 @@ -44,6 +47,21 @@ jobs: cc: gcc-12 - cxx: clang++-15 cc: clang-15 + - cxx: clang++-18 + cc: clang-18 + build_type: Debug + ivf: OFF + asan: ON + # address sanitizer flags + cmake_extra_args: >- + -DCMAKE_CXX_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g' + -DCMAKE_C_FLAGS='-fsanitize=address -fno-omit-frame-pointer -g' + -DCMAKE_EXE_LINKER_FLAGS='-fsanitize=address' + -DCMAKE_SHARED_LINKER_FLAGS='-fsanitize=address' + -DSVS_BUILD_BINARIES=NO + -DSVS_BUILD_EXAMPLES=NO + # skip longer-running tests + ctest_args: "-LE long" exclude: - cxx: g++-12 ivf: ON @@ -60,6 +78,13 @@ jobs: source /opt/intel/oneapi/setvars.sh printenv >> $GITHUB_ENV + - name: Install Clang 18 + if: matrix.cxx == 'clang++-18' + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 18 + - name: Configure build working-directory: ${{ runner.temp }} env: @@ -69,12 +94,10 @@ jobs: run: | cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DSVS_BUILD_BINARIES=YES \ -DSVS_BUILD_TESTS=YES \ - -DSVS_BUILD_EXAMPLES=YES \ - -DSVS_EXPERIMENTAL_LEANVEC=YES \ -DSVS_NO_AVX512=NO \ - -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} + -DSVS_EXPERIMENTAL_ENABLE_IVF=${{ matrix.ivf }} \ + ${{ matrix.cmake_extra_args }} - name: Build Tests and Utilities working-directory: ${{ runner.temp }}/build @@ -82,12 +105,13 @@ jobs: - name: Run tests env: - CTEST_OUTPUT_ON_FAILURE: 1 + CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/tests - run: ctest -C ${{ matrix.build_type }} + run: ctest -C ${{ matrix.build_type }} ${{ matrix.ctest_args }} - name: Run Cpp Examples + if: matrix.asan != 'ON' env: - CTEST_OUTPUT_ON_FAILURE: 1 + CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/examples/cpp - run: ctest -C RelWithDebugInfo + run: ctest -C ${{ matrix.build_type }} diff --git a/include/svs/core/distance/simd_utils.h b/include/svs/core/distance/simd_utils.h index f883abca..bd9834da 100644 --- a/include/svs/core/distance/simd_utils.h +++ b/include/svs/core/distance/simd_utils.h @@ -19,6 +19,7 @@ #if defined(__i386__) || defined(__x86_64__) #include +#include #include #include @@ -332,11 +333,10 @@ template <> struct ConvertToFloat<8> { // from float static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); } static __m256 load(mask_t m, const float* ptr) { - // AVX2 doesn't have native masked load, so we load and then blend - auto data = _mm256_loadu_ps(ptr); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Full width load with blending may cause out-of-bounds read (SEGV) + // Therefore we use _mm256_maskload_ps which safely handles masked loads + auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m)); + return _mm256_maskload_ps(ptr, mask_vec); } // from float16 @@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const Float16* ptr) { - auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast(ptr))); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + __m128i buffer = _mm_setzero_si128(); + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16)); + return _mm256_cvtph_ps(buffer); } // from uint8 @@ -359,12 +359,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const uint8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer))); } // from int8 @@ -375,12 +373,10 @@ template <> struct ConvertToFloat<8> { } static __m256 load(mask_t m, const int8_t* ptr) { - auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32( - _mm_cvtsi64_si128(*(reinterpret_cast(ptr))) - )); - auto zero = _mm256_setzero_ps(); - auto mask_vec = create_blend_mask_avx2(m); - return _mm256_blendv_ps(zero, data, mask_vec); + // Safe masked load using a temporary buffer to avoid SEGV + int64_t buffer = 0; + std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t)); + return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer))); } // We do not need to treat the left or right-hand differently. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ad82db1c..63c55a93 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -37,7 +37,7 @@ set(CMAKE_CXX_STANDARD ${SVS_CXX_STANDARD}) FetchContent_Declare( Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.4.0 + GIT_TAG v3.11.0 ) FetchContent_MakeAvailable(Catch2) @@ -230,5 +230,4 @@ target_include_directories(tests PRIVATE ${PROJECT_SOURCE_DIR}) list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) include(CTest) include(Catch) -catch_discover_tests(tests) - +catch_discover_tests(tests ADD_TAGS_AS_LABELS SKIP_IS_FAILURE) diff --git a/tests/svs/core/distance.cpp b/tests/svs/core/distance.cpp index 3b65c2b9..4f19b733 100644 --- a/tests/svs/core/distance.cpp +++ b/tests/svs/core/distance.cpp @@ -18,8 +18,14 @@ #include "svs/core/distance.h" // catch 2 +#include "catch2/catch_template_test_macros.hpp" #include "catch2/catch_test_macros.hpp" +#include +#include + +#include "svs/lib/avx_detection.h" + namespace { std::string_view test_table = R"( @@ -94,3 +100,70 @@ CATCH_TEST_CASE("Distance Utils", "[core][distance][distance_type]") { } } } + +CATCH_TEMPLATE_TEST_CASE( + "Distance ASan", + "[distance][simd][asan]", + svs::DistanceL2, + svs::DistanceIP, + svs::DistanceCosineSimilarity +) { + using Distance = TestType; + + auto run_test = []() { + // some full-width AVX2/AVX512 registers plus (crucially) ragged epilogue + constexpr size_t size = 64 + 2; + std::vector a(size); + std::vector b(size); + + std::iota(a.begin(), a.end(), 1.0f); + std::iota(b.begin(), b.end(), 2.0f); + + // Ensure no spare capacity + a.shrink_to_fit(); + b.shrink_to_fit(); + + auto dist = svs::distance::compute(Distance(), std::span(a), std::span(b)); + CATCH_REQUIRE(dist >= 0); + }; + + CATCH_SECTION("Default") { run_test(); } + +#ifdef __x86_64__ + if (svs::detail::avx_runtime_flags.is_avx512vnni_supported()) { + CATCH_SECTION("No AVX512VNNI") { + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + run_test(); + mutable_flags = original; + } + } + + if (svs::detail::avx_runtime_flags.is_avx512f_supported()) { + CATCH_SECTION("No AVX512F") { + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + mutable_flags.avx512f = false; + run_test(); + mutable_flags = original; + } + } + + if (svs::detail::avx_runtime_flags.is_avx2_supported()) { + CATCH_SECTION("No AVX2") { + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags; + mutable_flags.avx512vnni = false; + mutable_flags.avx512f = false; + mutable_flags.avx2 = false; + run_test(); + mutable_flags = original; + } + } +#endif // __x86_64__ +} diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index 29844f61..6ac89625 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -385,7 +385,7 @@ void test_end_to_end_clustering( } // namespace -CATCH_TEST_CASE("Random Clustering - End to End", "[inverted][random_clustering]") { +CATCH_TEST_CASE("Random Clustering - End to End", "[long][inverted][random_clustering]") { CATCH_SECTION("Uncompressed Data") { auto data = svs::data::SimpleData::load(test_dataset::data_svs_file()); test_end_to_end_clustering(data, svs::DistanceL2(), 1.2f); diff --git a/tests/svs/index/inverted/memory_based.cpp b/tests/svs/index/inverted/memory_based.cpp index 60479148..ad7d01b4 100644 --- a/tests/svs/index/inverted/memory_based.cpp +++ b/tests/svs/index/inverted/memory_based.cpp @@ -23,7 +23,7 @@ #include "tests/utils/test_dataset.h" #include -CATCH_TEST_CASE("InvertedIndex Logging Test", "[logging]") { +CATCH_TEST_CASE("InvertedIndex Logging Test", "[long][logging]") { // Vector to store captured log messages std::vector captured_logs; std::vector global_captured_logs; diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 464b1234..b94b902b 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -181,7 +181,7 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { CATCH_REQUIRE(captured_logs[2].find("Batch Size:") != std::string::npos); } -CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { +CATCH_TEST_CASE("Vamana Index Default Parameters", "[long][parameter][vamana]") { using Catch::Approx; std::filesystem::path data_path = test_dataset::data_svs_file(); diff --git a/tests/svs/index/vamana/multi.cpp b/tests/svs/index/vamana/multi.cpp index af52864f..63d450b3 100644 --- a/tests/svs/index/vamana/multi.cpp +++ b/tests/svs/index/vamana/multi.cpp @@ -48,7 +48,7 @@ template float pick_alpha(Distance SVS_UNUSED(dist)) { CATCH_TEMPLATE_TEST_CASE( "Multi-vector dynamic vamana index", - "[index][vamana][multi]", + "[long][index][vamana][multi]", svs::DistanceL2, svs::DistanceIP, svs::DistanceCosineSimilarity diff --git a/tests/svs/lib/avx_detection.cpp b/tests/svs/lib/avx_detection.cpp index 02d5f9e3..6c090efc 100644 --- a/tests/svs/lib/avx_detection.cpp +++ b/tests/svs/lib/avx_detection.cpp @@ -29,4 +29,15 @@ CATCH_TEST_CASE("AVX detection", "[lib][lib-avx-detection]") { << svs::detail::avx_runtime_flags.is_avx512f_supported() << "\n"; std::cout << "AVX512VNNI: " << std::boolalpha << svs::detail::avx_runtime_flags.is_avx512vnni_supported() << "\n"; + +#ifdef __x86_64__ + CATCH_SECTION("Patching") { + auto& mutable_flags = + const_cast(svs::detail::avx_runtime_flags); + auto original = mutable_flags.avx512f; + mutable_flags.avx512f = false; + CATCH_REQUIRE(svs::detail::avx_runtime_flags.is_avx512f_supported() == false); + mutable_flags.avx512f = original; + } +#endif }