@@ -384,14 +384,6 @@ template <typename Traits, typename Uint>
384384using KernelTraitsWithUnpack =
385385 KernelTraits<Uint, Traits::kShape .packed_bit_size(), Traits::kShape .simd_bit_size()>;
386386
387- template <typename Traits>
388- using KernelTraitsHalf =
389- KernelTraitsWithUnpack<Traits, SizedUint<Traits::kShape .unpacked_byte_size() / 2 >>;
390-
391- template <typename Traits>
392- using KernelTraitsDouble =
393- KernelTraitsWithUnpack<Traits, SizedUint<Traits::kShape .unpacked_byte_size() * 2 >>;
394-
395387/* *****************
396388 * MediumKernel *
397389 ******************/
@@ -845,13 +837,24 @@ struct KernelDispatch<Traits, std::enable_if_t<Traits::kShape.is_medium() &&
845837template <typename Traits>
846838struct KernelDispatch <
847839 Traits, std::enable_if_t <Traits::kShape .is_medium() && MediumShouldUseUint32<Traits>>>
848- : ForwardToKernel<Traits, MediumKernel<KernelTraitsHalf<Traits>>> {};
840+ : ForwardToKernel<Traits, MediumKernel<KernelTraitsWithUnpack<Traits, uint32_t >>> {};
841+
842+ // Benchmarking show large unpack to uint8_t is underperforming on SSE4.2
843+ template <typename Traits, typename Arch = typename Traits::arch_type>
844+ constexpr bool LargeShouldUseUint16 = HasSse2<Arch> &&
845+ (Traits::kShape .unpacked_byte_size() ==
846+ sizeof (uint8_t ));
849847
850- // Large kernel
851848template <typename Traits>
852- struct KernelDispatch <Traits, std::enable_if_t <Traits::kShape .is_large()>>
849+ struct KernelDispatch <
850+ Traits, std::enable_if_t <Traits::kShape .is_large() && !LargeShouldUseUint16<Traits>>>
853851 : LargeKernel<Traits> {};
854852
853+ template <typename Traits>
854+ struct KernelDispatch <
855+ Traits, std::enable_if_t <Traits::kShape .is_large() && LargeShouldUseUint16<Traits>>>
856+ : ForwardToKernel<Traits, MediumKernel<KernelTraitsWithUnpack<Traits, uint16_t >>> {};
857+
855858// Oversize kernel is only a few edge cases
856859template <typename Traits>
857860struct KernelDispatch <Traits, std::enable_if_t <Traits::kShape .is_oversized()>>
0 commit comments