Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ef180f7
fix(simd): umasked AVX2 load
ahuber21 Dec 9, 2025
c1705f5
remove L2Impl specific test
ahuber21 Dec 9, 2025
05dce8f
add asan yml
ahuber21 Dec 9, 2025
ecf5b28
fix naming and clang version
ahuber21 Dec 9, 2025
e5d46aa
copyright
ahuber21 Dec 9, 2025
f271dc9
typo
ahuber21 Dec 9, 2025
2f8dd96
comments on workflow file
ahuber21 Dec 9, 2025
4046e99
chore(ci): streamline asan & linux workflows
ahuber21 Dec 10, 2025
9bc5313
fixup: undo format
ahuber21 Dec 10, 2025
2a43709
refactor: use CATCH_TEMPLATE_TEST_CASE
ahuber21 Dec 10, 2025
1e74a55
fix(ci): remove unused cmake option
ahuber21 Dec 10, 2025
dbe4ae8
fix(ci): skip integration testsin debug asan build
ahuber21 Dec 10, 2025
2b00b78
feat(ci): add new tag 'long' that's skipped for asan
ahuber21 Dec 10, 2025
6cf9ecd
update catch2 and use ADD_TAGS_AS_LABELS ADD_TAGS_AS_LABELS
ahuber21 Dec 10, 2025
d94e2a7
add more [long] labels
ahuber21 Dec 10, 2025
3ba1fd8
revert simd_utils.h to trip new asan check in CI
ahuber21 Dec 10, 2025
528ff19
add AVX2 L2 calculation back to trigger asan
ahuber21 Dec 10, 2025
99dbac3
Revert "add AVX2 L2 calculation back to trigger asan"
ahuber21 Dec 10, 2025
055214f
Revert "revert simd_utils.h to trip new asan check in CI"
ahuber21 Dec 10, 2025
478c0dc
run all ISA paths in test
ahuber21 Dec 10, 2025
c205443
keep avx_runtime_flags const by using const_cast in tests
ahuber21 Dec 10, 2025
de0bfac
fix false positive failure for skipped tests
ahuber21 Dec 10, 2025
be64f34
fix: only modify isa dispatching on x86
ahuber21 Dec 10, 2025
f856a96
fixup
ahuber21 Dec 10, 2025
80d1d84
simplify test
ahuber21 Dec 10, 2025
966d58c
Revert "fixup"
ahuber21 Dec 10, 2025
6fcc214
fixup
ahuber21 Dec 10, 2025
7d5b6ed
Include asan in C flags
ahuber21 Dec 10, 2025
63e58cd
fixup
ahuber21 Dec 10, 2025
8b36bef
remove asan_options; remove auto-formatted double-quote change
ahuber21 Dec 11, 2025
4ee907d
Merge branch 'main' into dev/fix-unmasked-read
ahuber21 Dec 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions .github/workflows/asan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (C) 2025 Intel Corporation
#
# This software and the related documents are Intel copyrighted materials,
# and your use of them is governed by the express license under which they
# were provided to you ("License"). Unless the License provides otherwise,
# you may not use, modify, copy, publish, distribute, disclose or transmit
# this software or the related documents without Intel's prior written
# permission.
#
# This software and the related documents are provided as is, with no
# express or implied warranties, other than those that are expressly stated
# in the License.

name: Linux Build and Test
run-name: ${{ github.event.inputs.run_name || github.event.pull_request.title }}

on:
workflow_dispatch:
pull_request:

concurrency:
group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}"
cancel-in-progress: true

jobs:
asan-test:
name: clang-18 fsanitize=address
runs-on: [self-hosted, Linux, ubuntu-22.04]
env:
CXX: clang++-18
CC: clang-18

steps:
- name: "Cleanup build folder"
run: |
ls -la ./
sudo rm -rf ./* || true
sudo rm -rf ./.??* || true
ls -la ./
- uses: actions/checkout@v4
with:
submodules: true

- name: Configure build
working-directory: ${{ runner.temp }}
env:
TEMP_WORKSPACE: ${{ runner.temp }}
run: |
cmake -B${TEMP_WORKSPACE}/build -S${GITHUB_WORKSPACE} \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_CXX_FLAGS="-fsanitize=address -fno-omit-frame-pointer -g" \
-DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address" \
-DSVS_BUILD_TESTS=YES \
-DSVS_BUILD_BINARIES=NO \
-DSVS_BUILD_EXAMPLES=NO
- name: Build tests
working-directory: ${{ runner.temp }}/build
run: make tests -j

- name: Run tests
env:
CTEST_OUTPUT_ON_FAILURE: 1
ASAN_OPTIONS: detect_leaks=0
working-directory: ${{ runner.temp }}/build/tests
run: ./tests
38 changes: 17 additions & 21 deletions include/svs/core/distance/simd_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#if defined(__i386__) || defined(__x86_64__)

#include <array>
#include <cstring>
#include <limits>
#include <type_traits>

Expand Down Expand Up @@ -332,11 +333,10 @@ template <> struct ConvertToFloat<8> {
// from float
static __m256 load(const float* ptr) { return _mm256_loadu_ps(ptr); }
static __m256 load(mask_t m, const float* ptr) {
// AVX2 doesn't have native masked load, so we load and then blend
auto data = _mm256_loadu_ps(ptr);
auto zero = _mm256_setzero_ps();
auto mask_vec = create_blend_mask_avx2(m);
return _mm256_blendv_ps(zero, data, mask_vec);
// Full width load with blending may cause out-of-bounds read (SEGV)
// Therefore we use _mm256_maskload_ps which safely handles masked loads
auto mask_vec = _mm256_castps_si256(create_blend_mask_avx2(m));
return _mm256_maskload_ps(ptr, mask_vec);
}

// from float16
Expand All @@ -345,10 +345,10 @@ template <> struct ConvertToFloat<8> {
}

static __m256 load(mask_t m, const Float16* ptr) {
auto data = _mm256_cvtph_ps(_mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr)));
auto zero = _mm256_setzero_ps();
auto mask_vec = create_blend_mask_avx2(m);
return _mm256_blendv_ps(zero, data, mask_vec);
// Safe masked load using a temporary buffer to avoid SEGV
__m128i buffer = _mm_setzero_si128();
std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(Float16));
return _mm256_cvtph_ps(buffer);
}

// from uint8
Expand All @@ -359,12 +359,10 @@ template <> struct ConvertToFloat<8> {
}

static __m256 load(mask_t m, const uint8_t* ptr) {
auto data = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(
_mm_cvtsi64_si128(*(reinterpret_cast<const int64_t*>(ptr)))
));
auto zero = _mm256_setzero_ps();
auto mask_vec = create_blend_mask_avx2(m);
return _mm256_blendv_ps(zero, data, mask_vec);
// Safe masked load using a temporary buffer to avoid SEGV
int64_t buffer = 0;
std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(uint8_t));
return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_cvtsi64_si128(buffer)));
}

// from int8
Expand All @@ -375,12 +373,10 @@ template <> struct ConvertToFloat<8> {
}

static __m256 load(mask_t m, const int8_t* ptr) {
auto data = _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(
_mm_cvtsi64_si128(*(reinterpret_cast<const int64_t*>(ptr)))
));
auto zero = _mm256_setzero_ps();
auto mask_vec = create_blend_mask_avx2(m);
return _mm256_blendv_ps(zero, data, mask_vec);
// Safe masked load using a temporary buffer to avoid SEGV
int64_t buffer = 0;
std::memcpy(&buffer, ptr, __builtin_popcount(m) * sizeof(int8_t));
return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_cvtsi64_si128(buffer)));
}

// We do not need to treat the left or right-hand differently.
Expand Down
58 changes: 58 additions & 0 deletions tests/svs/core/distance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
// catch 2
#include "catch2/catch_test_macros.hpp"

#include <numeric>
#include <vector>

namespace {

std::string_view test_table = R"(
Expand Down Expand Up @@ -94,3 +97,58 @@ CATCH_TEST_CASE("Distance Utils", "[core][distance][distance_type]") {
}
}
}

CATCH_TEST_CASE("Distance asan L2", "[distance][simd][asan][l2]") {
// Try various sizes to hit the case where vector capacity == size
// and the SIMD load reads past the end into the redzone.
// We test sizes that are not multiples of 8 (AVX2 width) or 16 (AVX512 width).
for (size_t size = 1; size < 128; ++size) {
std::vector<float> a(size);
std::vector<float> b(size);

std::iota(a.begin(), a.end(), 0.0f);
std::iota(b.begin(), b.end(), 1.0f);

// Ensure no spare capacity
a.shrink_to_fit();
b.shrink_to_fit();

auto dist = svs::distance::L2::compute(a.data(), b.data(), size);
CATCH_REQUIRE(dist >= 0);
}
}

CATCH_TEST_CASE("Distance asan Cosine", "[distance][simd][asan][cosine]") {
for (size_t size = 1; size < 128; ++size) {
std::vector<float> a(size);
std::vector<float> b(size);

std::iota(a.begin(), a.end(), 0.0f);
std::iota(b.begin(), b.end(), 1.0f);

// Ensure no spare capacity
a.shrink_to_fit();
b.shrink_to_fit();

auto dist =
svs::distance::CosineSimilarity::compute(a.data(), b.data(), 1.0f, size);
CATCH_REQUIRE(dist >= 0);
}
}

CATCH_TEST_CASE("Distance asan IP", "[distance][simd][asan][ip]") {
for (size_t size = 1; size < 128; ++size) {
std::vector<float> a(size);
std::vector<float> b(size);

std::iota(a.begin(), a.end(), 0.0f);
std::iota(b.begin(), b.end(), 1.0f);

// Ensure no spare capacity
a.shrink_to_fit();
b.shrink_to_fit();

auto dist = svs::distance::IP::compute(a.data(), b.data(), size);
CATCH_REQUIRE(dist >= 0);
}
}
Loading