@@ -2034,6 +2034,132 @@ void WG_PS_CD(p1u8_p1f32_u32_p1i8)(uint8_t *keys, float *vals, uint32_t n,
20342034 n, scratch, std::greater_equal<uint8_t >{});
20352035}
20362036
2037+ DEVICE_EXTERN_C_INLINE
2038+ void WG_PS_CA (p1i8_p1u8_u32_p1i8)(int8_t *keys, uint8_t *vals, uint32_t n,
2039+ uint8_t *scratch) {
2040+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2041+ std::less_equal<int8_t >{});
2042+ }
2043+
2044+ DEVICE_EXTERN_C_INLINE
2045+ void WG_PS_CD (p1i8_p1u8_u32_p1i8)(int8_t *keys, uint8_t *vals, uint32_t n,
2046+ uint8_t *scratch) {
2047+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2048+ std::greater_equal<int8_t >{});
2049+ }
2050+
2051+ DEVICE_EXTERN_C_INLINE
2052+ void WG_PS_CA (p1i8_p1i8_u32_p1i8)(int8_t *keys, int8_t *vals, uint32_t n,
2053+ uint8_t *scratch) {
2054+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint8_t *>(vals), n,
2055+ scratch, std::less_equal<int8_t >{});
2056+ }
2057+
2058+ DEVICE_EXTERN_C_INLINE
2059+ void WG_PS_CD (p1i8_p1i8_u32_p1i8)(int8_t *keys, int8_t *vals, uint32_t n,
2060+ uint8_t *scratch) {
2061+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint8_t *>(vals), n,
2062+ scratch, std::greater_equal<int8_t >{});
2063+ }
2064+
2065+ DEVICE_EXTERN_C_INLINE
2066+ void WG_PS_CA (p1i8_p1u16_u32_p1i8)(int8_t *keys, uint16_t *vals, uint32_t n,
2067+ uint8_t *scratch) {
2068+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2069+ std::less_equal<int8_t >{});
2070+ }
2071+
2072+ DEVICE_EXTERN_C_INLINE
2073+ void WG_PS_CD (p1i8_p1u16_u32_p1i8)(int8_t *keys, uint16_t *vals, uint32_t n,
2074+ uint8_t *scratch) {
2075+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2076+ std::greater_equal<int8_t >{});
2077+ }
2078+
2079+ DEVICE_EXTERN_C_INLINE
2080+ void WG_PS_CA (p1i8_p1i16_u32_p1i8)(int8_t *keys, int16_t *vals, uint32_t n,
2081+ uint8_t *scratch) {
2082+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint16_t *>(vals),
2083+ n, scratch, std::less_equal<int8_t >{});
2084+ }
2085+
2086+ DEVICE_EXTERN_C_INLINE
2087+ void WG_PS_CD (p1i8_p1i16_u32_p1i8)(int8_t *keys, int16_t *vals, uint32_t n,
2088+ uint8_t *scratch) {
2089+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint16_t *>(vals),
2090+ n, scratch, std::greater_equal<int8_t >{});
2091+ }
2092+
2093+ DEVICE_EXTERN_C_INLINE
2094+ void WG_PS_CA (p1i8_p1u32_u32_p1i8)(int8_t *keys, uint32_t *vals, uint32_t n,
2095+ uint8_t *scratch) {
2096+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2097+ std::less_equal<int8_t >{});
2098+ }
2099+
2100+ DEVICE_EXTERN_C_INLINE
2101+ void WG_PS_CD (p1i8_p1u32_u32_p1i8)(int8_t *keys, uint32_t *vals, uint32_t n,
2102+ uint8_t *scratch) {
2103+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2104+ std::greater_equal<int8_t >{});
2105+ }
2106+
2107+ DEVICE_EXTERN_C_INLINE
2108+ void WG_PS_CA (p1i8_p1i32_u32_p1i8)(int8_t *keys, int32_t *vals, uint32_t n,
2109+ uint8_t *scratch) {
2110+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint32_t *>(vals),
2111+ n, scratch, std::less_equal<int8_t >{});
2112+ }
2113+
2114+ DEVICE_EXTERN_C_INLINE
2115+ void WG_PS_CD (p1i8_p1i32_u32_p1i8)(int8_t *keys, int32_t *vals, uint32_t n,
2116+ uint8_t *scratch) {
2117+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint32_t *>(vals),
2118+ n, scratch, std::greater_equal<int8_t >{});
2119+ }
2120+
2121+ DEVICE_EXTERN_C_INLINE
2122+ void WG_PS_CA (p1i8_p1u64_u32_p1i8)(int8_t *keys, uint64_t *vals, uint32_t n,
2123+ uint8_t *scratch) {
2124+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2125+ std::less_equal<int8_t >{});
2126+ }
2127+
2128+ DEVICE_EXTERN_C_INLINE
2129+ void WG_PS_CD (p1i8_p1u64_u32_p1i8)(int8_t *keys, uint64_t *vals, uint32_t n,
2130+ uint8_t *scratch) {
2131+ private_merge_sort_key_value_close (keys, vals, n, scratch,
2132+ std::greater_equal<int8_t >{});
2133+ }
2134+
2135+ DEVICE_EXTERN_C_INLINE
2136+ void WG_PS_CA (p1i8_p1i64_u32_p1i8)(int8_t *keys, int64_t *vals, uint32_t n,
2137+ uint8_t *scratch) {
2138+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint64_t *>(vals),
2139+ n, scratch, std::less_equal<int8_t >{});
2140+ }
2141+
2142+ DEVICE_EXTERN_C_INLINE
2143+ void WG_PS_CD (p1i8_p1i64_u32_p1i8)(int8_t *keys, int64_t *vals, uint32_t n,
2144+ uint8_t *scratch) {
2145+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint64_t *>(vals),
2146+ n, scratch, std::greater_equal<int8_t >{});
2147+ }
2148+
2149+ DEVICE_EXTERN_C_INLINE
2150+ void WG_PS_CA (p1i8_p1f32_u32_p1i8)(int8_t *keys, float *vals, uint32_t n,
2151+ uint8_t *scratch) {
2152+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint32_t *>(vals),
2153+ n, scratch, std::less_equal<int8_t >{});
2154+ }
2155+
2156+ DEVICE_EXTERN_C_INLINE
2157+ void WG_PS_CD (p1i8_p1f32_u32_p1i8)(int8_t *keys, float *vals, uint32_t n,
2158+ uint8_t *scratch) {
2159+ private_merge_sort_key_value_close (keys, reinterpret_cast <uint32_t *>(vals),
2160+ n, scratch, std::greater_equal<int8_t >{});
2161+ }
2162+
20372163DEVICE_EXTERN_C_INLINE
20382164void WG_PS_CA (p1u32_p1u32_u32_p1i8)(uint32_t *keys, uint32_t *vals, uint32_t n,
20392165 uint8_t *scratch) {
0 commit comments