Skip to content

Commit 697b275

Browse files
y3tsengy3tseng
authored andcommitted
add macro to avoid simd on ARM
1 parent b61eb9c commit 697b275

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ configure_file(src/version.hpp.in ${CMAKE_SOURCE_DIR}/src/version.hpp)
2121

2222
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin)
2323

24+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
25+
message(STATUS "Building TALCO with x86 SIMD")
26+
add_compile_definitions(TALCO_SIMD)
27+
endif()
28+
2429
if(USE_CUDA)
2530
message(STATUS "Build with CUDA")
2631
check_language(CUDA)

src/TALCO-XDrop.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
#include "TALCO-XDrop.hpp"
2727
#endif
2828

29+
#if defined(TALCO_SIMD)
2930
#include <immintrin.h>
31+
#endif
3032

3133
#define I_BOUNDARY -2
3234
#define D_BOUNDARY -3
@@ -372,6 +374,7 @@ void Talco_xdrop::Tile (
372374
const float* refColumns = reference[reference_idx + j].data();
373375
const float* qryColumns = query[query_idx + i].data();
374376
if (type == 0) {
377+
#if defined(TALCO_SIMD)
375378
__m256i mask = _mm256_setr_epi32(-1, -1, -1, -1, -1, 0, 0, 0); // first 5 valid
376379
for (int l = 0; l < 5; ++l) {
377380
__m256 sumvec = _mm256_setzero_ps();
@@ -390,18 +393,19 @@ void Talco_xdrop::Tile (
390393
}
391394
for (int l = 0; l < 5; ++l) numerator += refColumns[l] * qryColumns[5] * param->gapCharScore;
392395
for (int m = 0; m < 5; ++m) numerator += refColumns[5] * qryColumns[m] * param->gapCharScore;
393-
/*
396+
#else
394397
for (int l = 0; l < 6; ++l) {
395398
for (int m = 0; m < 6; ++m) {
396399
if (m == 5 && l == 5) numerator += 0;
397400
else if (m == 5 || l == 5) numerator += reference[reference_idx+j][l]*query[query_idx+i][m]*param->gapCharScore;
398401
else numerator += reference[reference_idx+j][l]*query[query_idx+i][m]*param->scoreMatrix[m][l];
399402
}
400403
}
401-
*/
404+
#endif
402405

403406
}
404407
else {
408+
#if defined(TALCO_SIMD)
405409
for (int l = 0; l < 21; ++l) { // skip 21 for now (gap row)
406410
__m256 sumvec = _mm256_setzero_ps();
407411
float ref_l = refColumns[l];
@@ -427,16 +431,15 @@ void Talco_xdrop::Tile (
427431
// Handle gap row/column (21)
428432
for (int l = 0; l < 21; ++l) numerator += refColumns[l] * qryColumns[21] * param->gapCharScore;
429433
for (int m = 0; m < 21; ++m) numerator += refColumns[21] * qryColumns[m] * param->gapCharScore;
430-
431-
/*
434+
#else
432435
for (int l = 0; l < 22; ++l) {
433436
for (int m = 0; m < 22; ++m) {
434437
if (m == 21 && l == 21) numerator += 0;
435438
else if (m == 21 || l == 21) numerator += reference[reference_idx+j][l]*query[query_idx+i][m]*gapExtend;
436439
else numerator += reference[reference_idx+j][l]*query[query_idx+i][m]*param->scoreMatrix[m][l];
437440
}
438441
}
439-
*/
442+
#endif
440443
}
441444
similarScore = numerator/denominator;
442445
if (tile == 0 && (i == 0 || j == 0 )) {

0 commit comments

Comments
 (0)