Skip to content

Commit 434cd1f

Browse files
ScottcjnFrancescAlted
authored andcommitted
fix: enable VSX shuffle on ppc64 big-endian with proper detection
- Widen CMake detection from ppc64le-only to all PowerPC variants (ppc64, ppc64le, powerpc, powerpc64, powerpc64le) - Relax shuffle-altivec.c guard from __VSX__ && _ARCH_PWR8 to __VSX__ && !__APPLE__, since the shuffle code only needs VSX (POWER7+), not POWER8. The !__APPLE__ guard avoids GCC Bug #121696 where __VSX__ is falsely defined on powerpc-apple-darwin. - Keep _ARCH_PWR8 requirement in bitshuffle-altivec.c (vec_bperm is a POWER8 ISA 2.07 instruction) - Fix vec_bperm result element index for big-endian: the 16-bit result is in element 0 on BE vs element 4 on LE G4/G5 Macs are unaffected: they lack __VSX__, so they continue to use the generic scalar path as before. Tested: builds and passes full test suite on x86_64. POWER8 S824 (ppc64le) and Power Mac G5 (ppc64be) available for hardware verification. Fixes #702
1 parent 9200990 commit 434cd1f

File tree

3 files changed

+28
-13
lines changed

3 files changed

+28
-13
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES armv7l|aarch64|arm64)
371371
# Unrecognized compiler. Emit a warning message to let the user know hardware-acceleration won't be available.
372372
message(WARNING "Unable to determine which ${CMAKE_SYSTEM_PROCESSOR} hardware features are supported by the C compiler (${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}).")
373373
endif()
374-
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le|powerpc64le)")
374+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
375375
if(CMAKE_C_COMPILER_ID MATCHES GNU AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 8)
376376
set(COMPILER_SUPPORT_ALTIVEC TRUE)
377377
elseif(CMAKE_C_COMPILER_ID MATCHES Clang AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 13)

blosc/bitshuffle-altivec.c

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,20 @@
2626
#include "bitshuffle-generic.h"
2727
#include <stdlib.h>
2828

29-
/* Make sure ALTIVEC is available for the compilation target and compiler. */
30-
#if defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8)
29+
/* Make sure ALTIVEC is available for the compilation target and compiler.
30+
vec_bperm requires POWER8 (ISA 2.07). VSX is needed for vec_xl/vec_xst.
31+
Exclude __APPLE__ because some GCC versions falsely define __VSX__ on
32+
powerpc-apple-darwin targets (GCC Bug #121696, cf. Eigen's workaround). */
33+
#if defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) && !defined(__APPLE__)
34+
35+
/* vec_bperm places its result in different elements depending on byte order.
36+
On big-endian the result is in element 0; on little-endian it is in
37+
element 4 (of a uint16_t vector). */
38+
#if defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
39+
#define BLOSC_BPERM_RESULT_IDX 0
40+
#else
41+
#define BLOSC_BPERM_RESULT_IDX 4
42+
#endif
3143

3244
#include "transpose-altivec.h"
3345

@@ -169,9 +181,9 @@ bitunshuffle1_altivec(void* _src, void* dest, const size_t size, const size_t el
169181
for (kk = 0; kk < 8; kk++) {
170182
__vector uint16_t tmp;
171183
tmp = (__vector uint16_t) vec_bperm(xmm, masks[kk]);
172-
//printf("%d %d\n", vp, tmp[4]);
184+
//printf("%d %d\n", vp, tmp[BLOSC_BPERM_RESULT_IDX]);
173185
//helper_print((__vector uint8_t)tmp, "tmp");
174-
out_s[vp++] = tmp[4];
186+
out_s[vp++] = tmp[BLOSC_BPERM_RESULT_IDX];
175187
}
176188
}
177189
}
@@ -351,7 +363,7 @@ int64_t bshuf_trans_bit_byte_altivec(const void* in, void* out, const size_t siz
351363
uint16_t* oui16;
352364
tmp = (__vector uint16_t) vec_bperm(data, masks[kk]);
353365
oui16 = (uint16_t*)&out_b[(ii + kk*nbyte) >> 3];
354-
*oui16 = tmp[4];
366+
*oui16 = tmp[BLOSC_BPERM_RESULT_IDX];
355367
}
356368
}
357369
count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
@@ -564,7 +576,7 @@ int64_t bshuf_shuffle_bit_eightelem_altivec(const void* in, void* out, const siz
564576
uint16_t* oui16;
565577
tmp = (__vector uint16_t) vec_bperm(data, masks[kk]);
566578
oui16 = (uint16_t*)&out_b[ii + (jj>>3) + kk * elem_size];
567-
*oui16 = tmp[4];
579+
*oui16 = tmp[BLOSC_BPERM_RESULT_IDX];
568580
}
569581
}
570582
}
@@ -595,7 +607,7 @@ int64_t bshuf_untrans_bit_elem_altivec(const void* in, void* out, const size_t s
595607

596608
const bool is_bshuf_altivec = true;
597609

598-
#else /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */
610+
#else /* VSX + POWER8 path */
599611

600612
const bool is_bshuf_altivec = false;
601613

@@ -611,4 +623,4 @@ bshuf_untrans_bit_elem_altivec(const void* in, void* out, const size_t size,
611623
abort();
612624
}
613625

614-
#endif /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */
626+
#endif /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) && !defined(__APPLE__) */

blosc/shuffle-altivec.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
#include "shuffle-generic.h"
1313
#include <stdlib.h>
1414

15-
/* Make sure ALTIVEC is available for the compilation target and compiler. */
16-
#if defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8)
15+
/* Make sure ALTIVEC is available for the compilation target and compiler.
16+
VSX (POWER7+) is required for vec_xl/vec_xst (unaligned loads/stores).
17+
Exclude __APPLE__ because some GCC versions falsely define __VSX__ on
18+
powerpc-apple-darwin targets (GCC Bug #121696, cf. Eigen's workaround). */
19+
#if defined(__ALTIVEC__) && defined(__VSX__) && !defined(__APPLE__)
1720

1821
#include "transpose-altivec.h"
1922

@@ -426,7 +429,7 @@ unshuffle_altivec(const int32_t bytesoftype, const int32_t blocksize,
426429

427430
const bool is_shuffle_altivec = true;
428431

429-
#else /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */
432+
#else /* defined(__ALTIVEC__) && defined(__VSX__) && !defined(__APPLE__) */
430433

431434
const bool is_shuffle_altivec = false;
432435

@@ -440,4 +443,4 @@ void unshuffle_altivec(const int32_t bytesoftype, const int32_t blocksize,
440443
abort();
441444
}
442445

443-
#endif /* defined(__ALTIVEC__) && defined(__VSX__) && defined(_ARCH_PWR8) */
446+
#endif /* defined(__ALTIVEC__) && defined(__VSX__) && !defined(__APPLE__) */

0 commit comments

Comments
 (0)