Skip to content

Commit dd38444

Browse files
authored
LoongArch64: add lsx support (#976)
1 parent f28141e commit dd38444

File tree

3 files changed

+137
-0
lines changed

3 files changed

+137
-0
lines changed

include/ada/common_defs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,10 @@ namespace ada {
243243
#define ADA_NEON 1
244244
#endif
245245

246+
#if defined(__loongarch_sx)
247+
#define ADA_LSX 1
248+
#endif
249+
246250
#ifndef __has_cpp_attribute
247251
#define ada_lifetime_bound
248252
#elif __has_cpp_attribute(msvc::lifetimebound)

src/helpers.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,58 @@ ada_really_inline size_t find_next_host_delimiter_special(
303303
}
304304
return size_t(view.length());
305305
}
306+
#elif ADA_LSX
307+
ada_really_inline size_t find_next_host_delimiter_special(
308+
std::string_view view, size_t location) noexcept {
309+
// first check for short strings in which case we do it naively.
310+
if (view.size() - location < 16) { // slow path
311+
for (size_t i = location; i < view.size(); i++) {
312+
if (view[i] == ':' || view[i] == '/' || view[i] == '\\' ||
313+
view[i] == '?' || view[i] == '[') {
314+
return i;
315+
}
316+
}
317+
return size_t(view.size());
318+
}
319+
// fast path for long strings (expected to be common)
320+
size_t i = location;
321+
const __m128i mask1 = __lsx_vrepli_b(':');
322+
const __m128i mask2 = __lsx_vrepli_b('/');
323+
const __m128i mask3 = __lsx_vrepli_b('\\');
324+
const __m128i mask4 = __lsx_vrepli_b('?');
325+
const __m128i mask5 = __lsx_vrepli_b('[');
326+
327+
for (; i + 15 < view.size(); i += 16) {
328+
__m128i word = __lsx_vld((const __m128i*)(view.data() + i), 0);
329+
__m128i m1 = __lsx_vseq_b(word, mask1);
330+
__m128i m2 = __lsx_vseq_b(word, mask2);
331+
__m128i m3 = __lsx_vseq_b(word, mask3);
332+
__m128i m4 = __lsx_vseq_b(word, mask4);
333+
__m128i m5 = __lsx_vseq_b(word, mask5);
334+
__m128i m =
335+
__lsx_vor_v(__lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m3, m4)), m5);
336+
int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
337+
if (mask != 0) {
338+
return i + trailing_zeroes(mask);
339+
}
340+
}
341+
if (i < view.size()) {
342+
__m128i word =
343+
__lsx_vld((const __m128i*)(view.data() + view.length() - 16), 0);
344+
__m128i m1 = __lsx_vseq_b(word, mask1);
345+
__m128i m2 = __lsx_vseq_b(word, mask2);
346+
__m128i m3 = __lsx_vseq_b(word, mask3);
347+
__m128i m4 = __lsx_vseq_b(word, mask4);
348+
__m128i m5 = __lsx_vseq_b(word, mask5);
349+
__m128i m =
350+
__lsx_vor_v(__lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m3, m4)), m5);
351+
int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
352+
if (mask != 0) {
353+
return view.length() - 16 + trailing_zeroes(mask);
354+
}
355+
}
356+
return size_t(view.length());
357+
}
306358
#else
307359
// : / [ \\ ?
308360
static constexpr std::array<uint8_t, 256> special_host_delimiters =
@@ -436,6 +488,53 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
436488
}
437489
return size_t(view.length());
438490
}
491+
#elif ADA_LSX
492+
ada_really_inline size_t find_next_host_delimiter(std::string_view view,
493+
size_t location) noexcept {
494+
// first check for short strings in which case we do it naively.
495+
if (view.size() - location < 16) { // slow path
496+
for (size_t i = location; i < view.size(); i++) {
497+
if (view[i] == ':' || view[i] == '/' || view[i] == '?' ||
498+
view[i] == '[') {
499+
return i;
500+
}
501+
}
502+
return size_t(view.size());
503+
}
504+
// fast path for long strings (expected to be common)
505+
size_t i = location;
506+
const __m128i mask1 = __lsx_vrepli_b(':');
507+
const __m128i mask2 = __lsx_vrepli_b('/');
508+
const __m128i mask4 = __lsx_vrepli_b('?');
509+
const __m128i mask5 = __lsx_vrepli_b('[');
510+
511+
for (; i + 15 < view.size(); i += 16) {
512+
__m128i word = __lsx_vld((const __m128i*)(view.data() + i), 0);
513+
__m128i m1 = __lsx_vseq_b(word, mask1);
514+
__m128i m2 = __lsx_vseq_b(word, mask2);
515+
__m128i m4 = __lsx_vseq_b(word, mask4);
516+
__m128i m5 = __lsx_vseq_b(word, mask5);
517+
__m128i m = __lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m4, m5));
518+
int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
519+
if (mask != 0) {
520+
return i + trailing_zeroes(mask);
521+
}
522+
}
523+
if (i < view.size()) {
524+
__m128i word =
525+
__lsx_vld((const __m128i*)(view.data() + view.length() - 16), 0);
526+
__m128i m1 = __lsx_vseq_b(word, mask1);
527+
__m128i m2 = __lsx_vseq_b(word, mask2);
528+
__m128i m4 = __lsx_vseq_b(word, mask4);
529+
__m128i m5 = __lsx_vseq_b(word, mask5);
530+
__m128i m = __lsx_vor_v(__lsx_vor_v(m1, m2), __lsx_vor_v(m4, m5));
531+
int mask = __lsx_vpickve2gr_hu(__lsx_vmsknz_b(m), 0);
532+
if (mask != 0) {
533+
return view.length() - 16 + trailing_zeroes(mask);
534+
}
535+
}
536+
return size_t(view.length());
537+
}
439538
#else
440539
// : / [ ?
441540
static constexpr std::array<uint8_t, 256> host_delimiters = []() consteval {

src/unicode.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ ADA_POP_DISABLE_WARNINGS
1313
#include <arm_neon.h>
1414
#elif ADA_SSE2
1515
#include <emmintrin.h>
16+
#elif ADA_LSX
17+
#include <lsxintrin.h>
1618
#endif
1719

1820
#include <ranges>
@@ -121,6 +123,38 @@ ada_really_inline bool has_tabs_or_newline(
121123
}
122124
return _mm_movemask_epi8(running) != 0;
123125
}
126+
#elif ADA_LSX
127+
ada_really_inline bool has_tabs_or_newline(
128+
std::string_view user_input) noexcept {
129+
// first check for short strings in which case we do it naively.
130+
if (user_input.size() < 16) { // slow path
131+
return std::ranges::any_of(user_input, is_tabs_or_newline);
132+
}
133+
// fast path for long strings (expected to be common)
134+
size_t i = 0;
135+
const __m128i mask1 = __lsx_vrepli_b('\r');
136+
const __m128i mask2 = __lsx_vrepli_b('\n');
137+
const __m128i mask3 = __lsx_vrepli_b('\t');
138+
// If we supported SSSE3, we could use the algorithm that we use for NEON.
139+
__m128i running{0};
140+
for (; i + 15 < user_input.size(); i += 16) {
141+
__m128i word = __lsx_vld((const __m128i*)(user_input.data() + i), 0);
142+
running = __lsx_vor_v(
143+
__lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
144+
__lsx_vseq_b(word, mask2))),
145+
__lsx_vseq_b(word, mask3));
146+
}
147+
if (i < user_input.size()) {
148+
__m128i word = __lsx_vld(
149+
(const __m128i*)(user_input.data() + user_input.length() - 16), 0);
150+
running = __lsx_vor_v(
151+
__lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
152+
__lsx_vseq_b(word, mask2))),
153+
__lsx_vseq_b(word, mask3));
154+
}
155+
if (__lsx_bz_v(running)) return false;
156+
return true;
157+
}
124158
#else
125159
ada_really_inline bool has_tabs_or_newline(
126160
std::string_view user_input) noexcept {

0 commit comments

Comments
 (0)