Skip to content

Commit 4cfe39f

Browse files
committed
Forward large unpack8 to unpack16 on SSE2
1 parent 4f3fc49 commit 4cfe39f

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

cpp/src/arrow/util/bpacking_simd_impl_internal.h

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -384,14 +384,6 @@ template <typename Traits, typename Uint>
384384
using KernelTraitsWithUnpack =
385385
KernelTraits<Uint, Traits::kShape.packed_bit_size(), Traits::kShape.simd_bit_size()>;
386386

387-
template <typename Traits>
388-
using KernelTraitsHalf =
389-
KernelTraitsWithUnpack<Traits, SizedUint<Traits::kShape.unpacked_byte_size() / 2>>;
390-
391-
template <typename Traits>
392-
using KernelTraitsDouble =
393-
KernelTraitsWithUnpack<Traits, SizedUint<Traits::kShape.unpacked_byte_size() * 2>>;
394-
395387
/******************
396388
* MediumKernel *
397389
******************/
@@ -845,13 +837,24 @@ struct KernelDispatch<Traits, std::enable_if_t<Traits::kShape.is_medium() &&
845837
template <typename Traits>
846838
struct KernelDispatch<
847839
Traits, std::enable_if_t<Traits::kShape.is_medium() && MediumShouldUseUint32<Traits>>>
848-
: ForwardToKernel<Traits, MediumKernel<KernelTraitsHalf<Traits>>> {};
840+
: ForwardToKernel<Traits, MediumKernel<KernelTraitsWithUnpack<Traits, uint32_t>>> {};
841+
842+
// Benchmarking show large unpack to uint8_t is underperforming on SSE4.2
843+
template <typename Traits, typename Arch = typename Traits::arch_type>
844+
constexpr bool LargeShouldUseUint16 = HasSse2<Arch> &&
845+
(Traits::kShape.unpacked_byte_size() ==
846+
sizeof(uint8_t));
849847

850-
// Large kernel
851848
template <typename Traits>
852-
struct KernelDispatch<Traits, std::enable_if_t<Traits::kShape.is_large()>>
849+
struct KernelDispatch<
850+
Traits, std::enable_if_t<Traits::kShape.is_large() && !LargeShouldUseUint16<Traits>>>
853851
: LargeKernel<Traits> {};
854852

853+
template <typename Traits>
854+
struct KernelDispatch<
855+
Traits, std::enable_if_t<Traits::kShape.is_large() && LargeShouldUseUint16<Traits>>>
856+
: ForwardToKernel<Traits, MediumKernel<KernelTraitsWithUnpack<Traits, uint16_t>>> {};
857+
855858
// Oversize kernel is only a few edge cases
856859
template <typename Traits>
857860
struct KernelDispatch<Traits, std::enable_if_t<Traits::kShape.is_oversized()>>

0 commit comments

Comments
 (0)