1
- /* auto-generated on 2023-05-09 17:25:59 -0400. Do not edit! */
1
+ /* auto-generated on 2023-05-16 13:48:47 -0400. Do not edit! */
2
2
/* begin file src/ada.cpp */
3
3
#include "ada.h"
4
4
/* begin file src/checkers.cpp */
@@ -9786,6 +9786,11 @@ std::string to_unicode(std::string_view input) {
9786
9786
ADA_POP_DISABLE_WARNINGS
9787
9787
9788
9788
#include <algorithm>
9789
+ #if ADA_NEON
9790
+ #include <arm_neon.h>
9791
+ #elif ADA_SSE2
9792
+ #include <emmintrin.h>
9793
+ #endif
9789
9794
9790
9795
namespace ada::unicode {
9791
9796
@@ -9817,8 +9822,58 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
9817
9822
}
9818
9823
return non_ascii == 0;
9819
9824
}
9820
-
9821
- ada_really_inline constexpr bool has_tabs_or_newline(
9825
+ #if ADA_NEON
9826
+ ada_really_inline bool has_tabs_or_newline(
9827
+ std::string_view user_input) noexcept {
9828
+ size_t i = 0;
9829
+ const uint8x16_t mask1 = vmovq_n_u8('\r');
9830
+ const uint8x16_t mask2 = vmovq_n_u8('\n');
9831
+ const uint8x16_t mask3 = vmovq_n_u8('\t');
9832
+ uint8x16_t running{0};
9833
+ for (; i + 15 < user_input.size(); i += 16) {
9834
+ uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i);
9835
+ running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1),
9836
+ vceqq_u8(word, mask2))),
9837
+ vceqq_u8(word, mask3));
9838
+ }
9839
+ if (i < user_input.size()) {
9840
+ uint8_t buffer[16]{};
9841
+ memcpy(buffer, user_input.data() + i, user_input.size() - i);
9842
+ uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i);
9843
+ running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1),
9844
+ vceqq_u8(word, mask2))),
9845
+ vceqq_u8(word, mask3));
9846
+ }
9847
+ return vmaxvq_u8(running) != 0;
9848
+ }
9849
+ #elif ADA_SSE2
9850
+ ada_really_inline bool has_tabs_or_newline(
9851
+ std::string_view user_input) noexcept {
9852
+ size_t i = 0;
9853
+ const __m128i mask1 = _mm_set1_epi8('\r');
9854
+ const __m128i mask2 = _mm_set1_epi8('\n');
9855
+ const __m128i mask3 = _mm_set1_epi8('\t');
9856
+ __m128i running{0};
9857
+ for (; i + 15 < user_input.size(); i += 16) {
9858
+ __m128i word = _mm_loadu_si128((const __m128i*)(user_input.data() + i));
9859
+ running = _mm_or_si128(
9860
+ _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
9861
+ _mm_cmpeq_epi8(word, mask2))),
9862
+ _mm_cmpeq_epi8(word, mask3));
9863
+ }
9864
+ if (i < user_input.size()) {
9865
+ uint8_t buffer[16]{};
9866
+ memcpy(buffer, user_input.data() + i, user_input.size() - i);
9867
+ __m128i word = _mm_loadu_si128((const __m128i*)buffer);
9868
+ running = _mm_or_si128(
9869
+ _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
9870
+ _mm_cmpeq_epi8(word, mask2))),
9871
+ _mm_cmpeq_epi8(word, mask3));
9872
+ }
9873
+ return _mm_movemask_epi8(running) != 0;
9874
+ }
9875
+ #else
9876
+ ada_really_inline bool has_tabs_or_newline(
9822
9877
std::string_view user_input) noexcept {
9823
9878
auto has_zero_byte = [](uint64_t v) {
9824
9879
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
@@ -9849,6 +9904,7 @@ ada_really_inline constexpr bool has_tabs_or_newline(
9849
9904
}
9850
9905
return running;
9851
9906
}
9907
+ #endif
9852
9908
9853
9909
// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR,
9854
9910
// U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
@@ -13732,8 +13788,11 @@ bool url_aggregator::set_hostname(const std::string_view input) {
13732
13788
13733
13789
[[nodiscard]] std::string_view url_aggregator::get_host() const noexcept {
13734
13790
ada_log("url_aggregator::get_host");
13791
+ // Technically, we should check if there is a hostname, but
13792
+ // the code below works even if there isn't.
13793
+ // if(!has_hostname()) { return ""; }
13735
13794
size_t start = components.host_start;
13736
- if (buffer.size() > components.host_start &&
13795
+ if (components.host_end > components.host_start &&
13737
13796
buffer[components.host_start] == '@') {
13738
13797
start++;
13739
13798
}
@@ -13747,9 +13806,12 @@ bool url_aggregator::set_hostname(const std::string_view input) {
13747
13806
13748
13807
[[nodiscard]] std::string_view url_aggregator::get_hostname() const noexcept {
13749
13808
ada_log("url_aggregator::get_hostname");
13809
+ // Technically, we should check if there is a hostname, but
13810
+ // the code below works even if there isn't.
13811
+ // if(!has_hostname()) { return ""; }
13750
13812
size_t start = components.host_start;
13751
13813
// So host_start is not where the host begins.
13752
- if (buffer.size() > components.host_start &&
13814
+ if (components.host_end > components.host_start &&
13753
13815
buffer[components.host_start] == '@') {
13754
13816
start++;
13755
13817
}
0 commit comments