Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
140 commits
Select commit Hold shift + click to select a range
1171d17
Add arm support
dor-forer Feb 19, 2025
8102ad1
Changed the arm cpu info
dor-forer Feb 20, 2025
0504e08
Add ip test
dor-forer Feb 20, 2025
ba931d0
Add to tests
dor-forer Feb 20, 2025
e0642c8
Added tests andbm
dor-forer Feb 20, 2025
4b8c347
fix tests
dor-forer Feb 20, 2025
3039eb8
Add github benchmakrs
dor-forer Feb 25, 2025
9a67ee8
Check 1
dor-forer Feb 25, 2025
a9b87d4
only arm
dor-forer Feb 25, 2025
da3c880
change ami
dor-forer Feb 25, 2025
1fdb6d5
Try ireland
dor-forer Feb 25, 2025
b4302e1
Try different image
dor-forer Feb 25, 2025
a83947a
try image
dor-forer Feb 25, 2025
a698070
back to old image
dor-forer Feb 25, 2025
730d8ac
larger image
dor-forer Feb 25, 2025
38371c5
Add option to change env
dor-forer Feb 25, 2025
202a89d
back to default region
dor-forer Feb 25, 2025
185703d
Created new image
dor-forer Feb 25, 2025
90e885c
Try to add the x86 to check
dor-forer Feb 25, 2025
d61c358
Try different machine
dor-forer Feb 25, 2025
4a88b1f
added include
dor-forer Feb 25, 2025
3ceadaa
Try without opti on arm
dor-forer Feb 25, 2025
e89762c
Change to c6g
dor-forer Feb 25, 2025
ba1ea86
added matrix region
dor-forer Feb 25, 2025
76b7132
change to west
dor-forer Feb 25, 2025
55bb40f
try the i8
dor-forer Feb 25, 2025
1b84ced
Try oregon
dor-forer Feb 25, 2025
3f98c27
Change subnet id
dor-forer Feb 25, 2025
66d96a1
Now subnet
dor-forer Feb 25, 2025
0c5f16c
Change subnet
dor-forer Feb 25, 2025
b2af693
add subnet
dor-forer Feb 25, 2025
20e596c
Try group id
dor-forer Feb 25, 2025
0682472
Change to vpc id
dor-forer Feb 26, 2025
9be3846
change subnet
dor-forer Feb 26, 2025
125e30b
Change ami
dor-forer Feb 26, 2025
6758753
Try without subnet
dor-forer Feb 26, 2025
2a37fb3
add security group again
dor-forer Feb 26, 2025
7d97821
Change the subnets
dor-forer Feb 26, 2025
97e7249
Change to ids
dor-forer Feb 26, 2025
4545554
Change sg
dor-forer Feb 26, 2025
3a443d3
psubnet
dor-forer Feb 26, 2025
a472150
Try different
dor-forer Feb 26, 2025
bee1c27
different
dor-forer Feb 26, 2025
4a891da
to a file
dor-forer Feb 26, 2025
0341dd7
print
dor-forer Feb 26, 2025
f8f424a
p
dor-forer Feb 26, 2025
ee0458a
leave empty
dor-forer Feb 26, 2025
26ff2cc
empty
dor-forer Feb 26, 2025
d3eaeeb
Try different account
dor-forer Feb 26, 2025
55bc653
Run 2 arm machines
dor-forer Feb 26, 2025
21de162
Move both to us-west-2
dor-forer Feb 26, 2025
6f8e4d4
Try workflow
dor-forer Feb 26, 2025
eedc25c
Change name
dor-forer Feb 26, 2025
578b88d
Changes
dor-forer Feb 26, 2025
41e920f
Change the secrets
dor-forer Feb 27, 2025
6218a9c
Add supprted arch
dor-forer Feb 27, 2025
1533ba7
Add defaults
dor-forer Feb 27, 2025
a86d7ac
Support all
dor-forer Feb 27, 2025
7652c9e
Change the jq
dor-forer Feb 27, 2025
c369125
Change machine to t4g
dor-forer Feb 27, 2025
9d9a047
Change the name
dor-forer Feb 27, 2025
14f8739
Change the machine
dor-forer Feb 27, 2025
2f119ec
fix the stop
dor-forer Feb 27, 2025
96d63af
only benchamrk
dor-forer Mar 2, 2025
305aa0b
add the secrets
dor-forer Mar 2, 2025
4e45109
region secret
dor-forer Mar 2, 2025
1b4649a
benchmark region
dor-forer Mar 2, 2025
797d1d6
Change timeout
dor-forer Mar 3, 2025
db9c63e
Added support for arch name in benchamrks
dor-forer Mar 9, 2025
106fc5e
change th json
dor-forer Mar 9, 2025
a0d62fb
changed to v9.0
dor-forer Mar 9, 2025
b8075b1
Change the check
dor-forer Mar 9, 2025
2007e33
add v9
dor-forer Mar 9, 2025
606cea7
Check alt version of armv9
dor-forer Mar 9, 2025
12bead0
added check
dor-forer Mar 9, 2025
976c366
add arc_arch
dor-forer Mar 9, 2025
8e23a2f
changed to CONCAT_WITH_UNDERSCORE_ARCH
dor-forer Mar 9, 2025
e81ce18
change the check
dor-forer Mar 9, 2025
f8f3d9e
Add full check
dor-forer Mar 9, 2025
f408017
fix the instruct
dor-forer Mar 10, 2025
0af63d8
Added the cmake
dor-forer Mar 10, 2025
38d563a
fix the support
dor-forer Mar 10, 2025
87ac845
put it back to cmake
dor-forer Mar 10, 2025
14bcd59
back
dor-forer Mar 10, 2025
b48d9c4
change the condition
dor-forer Mar 10, 2025
47b9724
No armpl for now
dor-forer Mar 10, 2025
1b35e30
cland format
dor-forer Mar 11, 2025
cafb30c
remove the opt
dor-forer Mar 11, 2025
bde60e4
Changed to one machine
dor-forer Mar 11, 2025
421715c
Added BENCHMARK_ARCH
dor-forer Mar 11, 2025
3c07da6
fix endif
dor-forer Mar 11, 2025
eabe27c
Remove secrets call
dor-forer Mar 12, 2025
7beb70b
pr changes
dor-forer Mar 12, 2025
66b37a6
Changes
dor-forer Mar 12, 2025
768636d
change to compile
dor-forer Mar 12, 2025
01a4f60
add sve
dor-forer Mar 13, 2025
287490f
add #endif
dor-forer Mar 13, 2025
570ab69
add armpl
dor-forer Mar 13, 2025
9ad8c1e
add to cmake
dor-forer Mar 13, 2025
0334e43
remove armpl
dor-forer Mar 13, 2025
15e7963
add install
dor-forer Mar 13, 2025
3750241
Add ARCH=$(uname -m)
dor-forer Mar 13, 2025
22596de
change the path to armpl
dor-forer Mar 13, 2025
69a2f24
suuport check for armv7
dor-forer Mar 13, 2025
f31c2a3
change the armpl
dor-forer Mar 13, 2025
fd6291e
Change or OR
dor-forer Mar 13, 2025
154b2a8
Merge branch 'dorer-add-arm-support' of https://github.com/RedisAI/Ve…
dor-forer Mar 13, 2025
877a70e
add neon supported for spaces
dor-forer Mar 16, 2025
c32ef14
add sve
dor-forer Mar 16, 2025
655a474
add support
dor-forer Mar 16, 2025
4cc47c3
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer Mar 16, 2025
6ae4deb
align
dor-forer Mar 16, 2025
9b09210
format
dor-forer Mar 16, 2025
d3cb7ae
change error
dor-forer Mar 16, 2025
405931d
change
dor-forer Mar 16, 2025
ef9563f
Removed the ifdef
dor-forer Mar 16, 2025
220616b
Add comments
dor-forer Mar 16, 2025
52c5382
clang
dor-forer Mar 16, 2025
e31aa8a
Change names
dor-forer Mar 17, 2025
63ce083
format
dor-forer Mar 17, 2025
c6317bc
PR changes
dor-forer Mar 19, 2025
45e8fdd
Change to 1
dor-forer Mar 19, 2025
f1487b8
fix the l2
dor-forer Mar 19, 2025
8e097c8
fix format
dor-forer Mar 19, 2025
4aeca15
add desciriopn for chunk == 1
dor-forer Mar 19, 2025
4489bf3
remove template armpl
dor-forer Mar 23, 2025
6fa2474
Back to armpl
dor-forer Mar 23, 2025
f2305dc
back to armpl_neon
dor-forer Mar 23, 2025
d567ab2
include
dor-forer Mar 23, 2025
192f8e6
armnpl
dor-forer Mar 23, 2025
87feb67
Revert implemetion chooser
dor-forer Mar 24, 2025
67aa3fc
Revert remove error
dor-forer Mar 24, 2025
5ec219d
Remove comment
dor-forer Mar 24, 2025
8882492
Remove empty line
dor-forer Mar 24, 2025
0310d16
Add support macos
dor-forer Mar 25, 2025
f8fb4d2
add sudo
dor-forer Mar 25, 2025
b72986a
Add absolute path
dor-forer Mar 25, 2025
eacebb2
find all libs
dor-forer Mar 25, 2025
ead05e9
Change folder
dor-forer Mar 25, 2025
e9d0d64
Now set for real
dor-forer Mar 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .install/install_script.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

OS_TYPE=$(uname -s)
ARCH=$(uname -m)

if [[ $OS_TYPE = 'Darwin' ]]
then
Expand All @@ -16,6 +17,23 @@ else
OS=$(echo $OS | sed 's/[/ ]/_/g') # replace spaces and slashes with underscores
fi
echo $OS
if [[ $ARCH == 'aarch64' ]]
then
wget https://developer.arm.com/-/cdn-downloads/permalink/Arm-Performance-Libraries/Version_24.10/arm-performance-libraries_24.10_deb_gcc.tar
tar -xf arm-performance-libraries_24.10_deb_gcc.tar
sudo ./arm-performance-libraries_24.10_deb/arm-performance-libraries_24.10_deb.sh --accept --install-to armpl
sudo rm -rf arm-performance-libraries_24.10_deb_gcc.tar
# install libarmpl
elif [[ $OS == 'macos' ]]
then
wget https://developer.arm.com/-/cdn-downloads/permalink/Arm-Performance-Libraries/Version_24.10/arm-performance-libraries_24.10_macOS.tgz
tar zxvf arm-performance-libraries_24.10_macOS.tgz
hdiutil attach armpl_24.10_flang-new_clang_19.dmg
sudo /Volumes/armpl_24.10_flang-new_clang_19_installer/armpl_24.10_flang-new_clang_19_install.sh --install-to=$(pwd)/armpl -y
# Clean up
hdiutil detach /Volumes/armpl_24.10_flang-new_clang_19_installer
rm -f arm-performance-libraries_24.10_macOS.tgz
fi

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

Expand Down
7 changes: 4 additions & 3 deletions cmake/aarch64InstructionFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ message(STATUS "Building for ARM aarch64")
CHECK_CXX_COMPILER_FLAG("-march=armv7-a+neon" CXX_ARMV7_NEON)
CHECK_CXX_COMPILER_FLAG("-march=armv8-a" CXX_ARMV8A)
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+sve" CXX_SVE)
CHECK_CXX_COMPILER_FLAG("-march=armv9-a+sve2" CXX_ARMV9)
CHECK_CXX_COMPILER_FLAG("-march=armv9-a+sve2" CXX_SVE2)


# Only use ARMv9 if both compiler and CPU support it
if(CXX_ARMV9)
if(CXX_SVE2)
message(STATUS "Using ARMv9.0-a with SVE2 (supported by CPU)")
add_compile_definitions(OPT_ARMV9)
add_compile_definitions(OPT_SVE2)
endif()
if (CXX_ARMV8A OR CXX_ARMV7_NEON)
add_compile_definitions(OPT_NEON)
Expand Down
54 changes: 53 additions & 1 deletion src/VecSim/spaces/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,58 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
endif()
endif()

if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv.*)")
message(STATUS "Enabling Arm Performance Libraries integration")
include(${root}/cmake/aarch64InstructionFlags.cmake)
if(EXISTS "${root}/armpl/armpl_24.10_gcc")
set(ARMPL_DIR "${root}/armpl/armpl_24.10_gcc")
elseif(EXISTS "${root}/armpl/armpl_24.10_flang-new_clang_19")
set(ARMPL_DIR "${root}/armpl/armpl_24.10_flang-new_clang_19")
elseif(EXISTS "/opt/arm/armpl_24.10_gcc")
set(ARMPL_DIR "/opt/arm/armpl_24.10_gcc")
elseif(EXISTS "/opt/arm/armpl_24.10_flang-new_clang_19")
set(ARMPL_DIR "/opt/arm/armpl_24.10_flang-new_clang_19")
else()
# Default fallback path
set(ARMPL_DIR "${root}/armpl/armpl_24.10_gcc")
message(WARNING "None of the expected ARM Performance Libraries paths exist. Using default: ${ARMPL_DIR}")
endif()

include_directories(${ARMPL_DIR}/include)
message(STATUS "$ENV{ARMPL_LIBRARIES} and ${ARMPL_DIR}/lib")
# Find and set up ARM Performance Libraries with no default path
find_library(ARMPL_LIB armpl_lp64 PATHS ${ARMPL_DIR}/lib)

if(NOT ARMPL_LIB)

message(STATUS "Could not find ARM Performance Libraries")
else()
message(STATUS "Found ARM Performance Libraries: ${ARMPL_LIB}")
endif()

# Create different optimization implementations for ARM architecture
# ArmPL used for standard ARMv8-A with NEON
if (CXX_ARMV8A)
message("Building with ARMV8A and ArmPL")
set_source_files_properties(functions/ARMPL_NEON.cpp PROPERTIES COMPILE_FLAGS "-march=armv8-a")
list(APPEND OPTIMIZATIONS functions/ARMPL_NEON.cpp)
endif()

# ArmPL with SVE support
if (CXX_SVE)
message("Building with SVE and ArmPL")
set_source_files_properties(functions/ARMPL_SVE.cpp PROPERTIES COMPILE_FLAGS "-march=armv8-a+sve")
list(APPEND OPTIMIZATIONS functions/ARMPL_SVE.cpp)
endif()

# ArmPL with SVE2 support
if (CXX_SVE2)
message("Building with ARMV9 and ArmPL")
set_source_files_properties(functions/ARMPL_SVE2.cpp PROPERTIES COMPILE_FLAGS "-march=armv9-a+sve2")
list(APPEND OPTIMIZATIONS functions/ARMPL_SVE2.cpp)
endif()
endif()

# Here we are compiling the space selectors with the relevant optimization flag.
add_library(VectorSimilaritySpaces
L2_space.cpp
Expand All @@ -90,4 +142,4 @@ add_library(VectorSimilaritySpaces
computer/preprocessor_container.cpp
)

target_link_libraries(VectorSimilaritySpaces VectorSimilaritySpaces_no_optimization cpu_features)
target_link_libraries(VectorSimilaritySpaces VectorSimilaritySpaces_no_optimization cpu_features ${ARMPL_LIB})
17 changes: 17 additions & 0 deletions src/VecSim/spaces/IP/IP_ARMPL_NEON_FP32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
*Copyright Redis Ltd. 2021 - present
*Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
*the Server Side Public License v1 (SSPLv1).
*/

#include "VecSim/spaces/space_includes.h"
#include <armpl.h>

float FP32_InnerProduct_ARMPL_NEON(const void *pVect1v, const void *pVect2v, size_t dimension) {
auto *vec1 = (float *)pVect1v;
auto *vec2 = (float *)pVect2v;

// Notice: Armpl can choose different implementation based on cpu features.
float res = cblas_sdot(static_cast<int>(dimension), vec1, 1, vec2, 1);
return 1.0f - res;
}
17 changes: 17 additions & 0 deletions src/VecSim/spaces/IP/IP_ARMPL_SVE2_FP32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
*Copyright Redis Ltd. 2021 - present
*Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
*the Server Side Public License v1 (SSPLv1).
*/

#include "VecSim/spaces/space_includes.h"
#include <armpl.h>

float FP32_InnerProduct_ARMPL_SVE2(const void *pVect1v, const void *pVect2v, size_t dimension) {
auto *vec1 = (float *)pVect1v;
auto *vec2 = (float *)pVect2v;

// Notice: Armpl can choose different implementation based on cpu features.
float res = cblas_sdot(static_cast<int>(dimension), vec1, 1, vec2, 1);
return 1.0f - res;
}
17 changes: 17 additions & 0 deletions src/VecSim/spaces/IP/IP_ARMPL_SVE_FP32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
*Copyright Redis Ltd. 2021 - present
*Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
*the Server Side Public License v1 (SSPLv1).
*/

#include "VecSim/spaces/space_includes.h"
#include <armpl.h>

float FP32_InnerProduct_ARMPL_SVE(const void *pVect1v, const void *pVect2v, size_t dimension) {
auto *vec1 = (float *)pVect1v;
auto *vec2 = (float *)pVect2v;

// Notice: Armpl can choose different implementation based on cpu features.
float res = cblas_sdot(static_cast<int>(dimension), vec1, 1, vec2, 1);
return 1.0f - res;
}
27 changes: 27 additions & 0 deletions src/VecSim/spaces/IP_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
#include "VecSim/spaces/functions/AVX2.h"
#include "VecSim/spaces/functions/SSE3.h"
#include "VecSim/spaces/functions/ARMPL_NEON.h"
#include "VecSim/spaces/functions/ARMPL_SVE.h"
#include "VecSim/spaces/functions/ARMPL_SVE2.h"

using bfloat16 = vecsim_types::bfloat16;
using float16 = vecsim_types::float16;
Expand All @@ -35,6 +38,30 @@ dist_func_t<float> IP_FP32_GetDistFunc(size_t dim, unsigned char *alignment, con
if (dim < 16) {
return ret_dist_func;
}

#ifdef CPU_FEATURES_ARCH_AARCH64
auto features = (arch_opt == nullptr)
? cpu_features::GetAarch64Info().features
: *static_cast<const cpu_features::Aarch64Features *>(arch_opt);

#ifdef OPT_SVE2
if (features.sve2) {
return Choose_FP32_IP_implementation_ARMPL_SVE2(dim);
}
#endif
#ifdef OPT_SVE
if (features.sve) {
return Choose_FP32_IP_implementation_ARMPL_SVE(dim);
}
#endif
#ifdef OPT_NEON
if (features.asimd) {
return Choose_FP32_IP_implementation_ARMPL_NEON(dim);
}
#endif

#endif

#ifdef CPU_FEATURES_ARCH_X86_64
auto features = (arch_opt == nullptr)
? cpu_features::GetX86Info().features
Expand Down
49 changes: 49 additions & 0 deletions src/VecSim/spaces/L2/L2_ARMPL_NEON_FP32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
*Copyright Redis Ltd. 2021 - present
*Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
*the Server Side Public License v1 (SSPLv1).
*/

#include "VecSim/spaces/space_includes.h"
#include <armpl.h>

float FP32_L2Sqr_ARMPL_NEON(const void *pVect1v, const void *pVect2v, size_t dimension) {
const float *vec1 = static_cast<const float *>(pVect1v);
const float *vec2 = static_cast<const float *>(pVect2v);

float result = 0.0f;
constexpr const size_t blockSize = 1024;
float buffer[blockSize];

// Pre-calculate number of full blocks and the size of the last partial block
const size_t fullBlockCount = dimension / blockSize;
const size_t lastBlockSize = dimension % blockSize;

// Process full blocks
for (size_t i = 0; i < fullBlockCount; i++) {
size_t offset = i * blockSize;

// Calculate difference vector for full block
for (size_t j = 0; j < blockSize; j++) {
buffer[j] = vec1[offset + j] - vec2[offset + j];
}

// Use ARMPL to compute dot product
result += cblas_sdot(blockSize, buffer, 1, buffer, 1);
}

// Handle remaining elements (if any)
if (lastBlockSize > 0) {
size_t offset = fullBlockCount * blockSize;

// Calculate difference vector for remaining elements
for (size_t j = 0; j < lastBlockSize; j++) {
buffer[j] = vec1[offset + j] - vec2[offset + j];
}

// Use ARMPL to compute dot product
result += cblas_sdot(lastBlockSize, buffer, 1, buffer, 1);
}

return result;
}
49 changes: 49 additions & 0 deletions src/VecSim/spaces/L2/L2_ARMPL_SVE2_FP32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
*Copyright Redis Ltd. 2021 - present
*Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
*the Server Side Public License v1 (SSPLv1).
*/

#include "VecSim/spaces/space_includes.h"
#include "armpl.h"

float FP32_L2Sqr_ARMPL_SVE2(const void *pVect1v, const void *pVect2v, size_t dimension) {
const float *vec1 = static_cast<const float *>(pVect1v);
const float *vec2 = static_cast<const float *>(pVect2v);

float result = 0.0f;
constexpr const size_t blockSize = 1024;
float buffer[blockSize];

// Pre-calculate number of full blocks and the size of the last partial block
const size_t fullBlockCount = dimension / blockSize;
const size_t lastBlockSize = dimension % blockSize;

// Process full blocks
for (size_t i = 0; i < fullBlockCount; i++) {
size_t offset = i * blockSize;

// Calculate difference vector for full block
for (size_t j = 0; j < blockSize; j++) {
buffer[j] = vec1[offset + j] - vec2[offset + j];
}

// Use ARMPL to compute dot product
result += cblas_sdot(blockSize, buffer, 1, buffer, 1);
}

// Handle remaining elements (if any)
if (lastBlockSize > 0) {
size_t offset = fullBlockCount * blockSize;

// Calculate difference vector for remaining elements
for (size_t j = 0; j < lastBlockSize; j++) {
buffer[j] = vec1[offset + j] - vec2[offset + j];
}

// Use ARMPL to compute dot product
result += cblas_sdot(lastBlockSize, buffer, 1, buffer, 1);
}

return result;
}
49 changes: 49 additions & 0 deletions src/VecSim/spaces/L2/L2_ARMPL_SVE_FP32.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
*Copyright Redis Ltd. 2021 - present
*Licensed under your choice of the Redis Source Available License 2.0 (RSALv2) or
*the Server Side Public License v1 (SSPLv1).
*/

#include "VecSim/spaces/space_includes.h"
#include "armpl.h"

float FP32_L2Sqr_ARMPL_SVE(const void *pVect1v, const void *pVect2v, size_t dimension) {
const float *vec1 = static_cast<const float *>(pVect1v);
const float *vec2 = static_cast<const float *>(pVect2v);

float result = 0.0f;
constexpr const size_t blockSize = 1024;
float buffer[blockSize];

// Pre-calculate number of full blocks and the size of the last partial block
const size_t fullBlockCount = dimension / blockSize;
const size_t lastBlockSize = dimension % blockSize;

// Process full blocks
for (size_t i = 0; i < fullBlockCount; i++) {
size_t offset = i * blockSize;

// Calculate difference vector for full block
for (size_t j = 0; j < blockSize; j++) {
buffer[j] = vec1[offset + j] - vec2[offset + j];
}

// Use ARMPL to compute dot product
result += cblas_sdot(blockSize, buffer, 1, buffer, 1);
}

// Handle remaining elements (if any)
if (lastBlockSize > 0) {
size_t offset = fullBlockCount * blockSize;

// Calculate difference vector for remaining elements
for (size_t j = 0; j < lastBlockSize; j++) {
buffer[j] = vec1[offset + j] - vec2[offset + j];
}

// Use ARMPL to compute dot product
result += cblas_sdot(lastBlockSize, buffer, 1, buffer, 1);
}

return result;
}
25 changes: 25 additions & 0 deletions src/VecSim/spaces/L2_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#include "VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h"
#include "VecSim/spaces/functions/AVX2.h"
#include "VecSim/spaces/functions/SSE3.h"
#include "VecSim/spaces/functions/ARMPL_NEON.h"
#include "VecSim/spaces/functions/ARMPL_SVE.h"
#include "VecSim/spaces/functions/ARMPL_SVE2.h"

using bfloat16 = vecsim_types::bfloat16;
using float16 = vecsim_types::float16;
Expand All @@ -35,6 +38,28 @@ dist_func_t<float> L2_FP32_GetDistFunc(size_t dim, unsigned char *alignment, con
if (dim < 16) {
return ret_dist_func;
}
#ifdef CPU_FEATURES_ARCH_AARCH64
auto features = (arch_opt == nullptr)
? cpu_features::GetAarch64Info().features
: *static_cast<const cpu_features::Aarch64Features *>(arch_opt);

#ifdef OPT_SVE2
if (features.sve2) {
return Choose_FP32_L2_implementation_ARMPL_SVE2(dim);
}
#endif
#ifdef OPT_SVE
if (features.sve) {
return Choose_FP32_L2_implementation_ARMPL_SVE(dim);
}
#endif
#ifdef OPT_NEON
if (features.asimd) {
return Choose_FP32_L2_implementation_ARMPL_NEON(dim);
}
#endif
#endif

#ifdef CPU_FEATURES_ARCH_X86_64
auto features = (arch_opt == nullptr)
? cpu_features::GetX86Info().features
Expand Down
Loading
Loading