Skip to content

Commit 7cca814

Browse files
committed
chore: update ada to 2.4.1
1 parent 7287db6 commit 7cca814

File tree

3 files changed

+91
-15
lines changed

3 files changed

+91
-15
lines changed

deps/ada.cpp

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2023-05-09 17:25:59 -0400. Do not edit! */
1+
/* auto-generated on 2023-05-16 13:48:47 -0400. Do not edit! */
22
/* begin file src/ada.cpp */
33
#include "ada.h"
44
/* begin file src/checkers.cpp */
@@ -9786,6 +9786,11 @@ std::string to_unicode(std::string_view input) {
97869786
ADA_POP_DISABLE_WARNINGS
97879787

97889788
#include <algorithm>
9789+
#if ADA_NEON
9790+
#include <arm_neon.h>
9791+
#elif ADA_SSE2
9792+
#include <emmintrin.h>
9793+
#endif
97899794

97909795
namespace ada::unicode {
97919796

@@ -9817,8 +9822,58 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
98179822
}
98189823
return non_ascii == 0;
98199824
}
9820-
9821-
ada_really_inline constexpr bool has_tabs_or_newline(
9825+
#if ADA_NEON
9826+
ada_really_inline bool has_tabs_or_newline(
9827+
std::string_view user_input) noexcept {
9828+
size_t i = 0;
9829+
const uint8x16_t mask1 = vmovq_n_u8('\r');
9830+
const uint8x16_t mask2 = vmovq_n_u8('\n');
9831+
const uint8x16_t mask3 = vmovq_n_u8('\t');
9832+
uint8x16_t running{0};
9833+
for (; i + 15 < user_input.size(); i += 16) {
9834+
uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i);
9835+
running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1),
9836+
vceqq_u8(word, mask2))),
9837+
vceqq_u8(word, mask3));
9838+
}
9839+
if (i < user_input.size()) {
9840+
uint8_t buffer[16]{};
9841+
memcpy(buffer, user_input.data() + i, user_input.size() - i);
9842+
uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i);
9843+
running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1),
9844+
vceqq_u8(word, mask2))),
9845+
vceqq_u8(word, mask3));
9846+
}
9847+
return vmaxvq_u8(running) != 0;
9848+
}
9849+
#elif ADA_SSE2
9850+
ada_really_inline bool has_tabs_or_newline(
9851+
std::string_view user_input) noexcept {
9852+
size_t i = 0;
9853+
const __m128i mask1 = _mm_set1_epi8('\r');
9854+
const __m128i mask2 = _mm_set1_epi8('\n');
9855+
const __m128i mask3 = _mm_set1_epi8('\t');
9856+
__m128i running{0};
9857+
for (; i + 15 < user_input.size(); i += 16) {
9858+
__m128i word = _mm_loadu_si128((const __m128i*)(user_input.data() + i));
9859+
running = _mm_or_si128(
9860+
_mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
9861+
_mm_cmpeq_epi8(word, mask2))),
9862+
_mm_cmpeq_epi8(word, mask3));
9863+
}
9864+
if (i < user_input.size()) {
9865+
uint8_t buffer[16]{};
9866+
memcpy(buffer, user_input.data() + i, user_input.size() - i);
9867+
__m128i word = _mm_loadu_si128((const __m128i*)buffer);
9868+
running = _mm_or_si128(
9869+
_mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
9870+
_mm_cmpeq_epi8(word, mask2))),
9871+
_mm_cmpeq_epi8(word, mask3));
9872+
}
9873+
return _mm_movemask_epi8(running) != 0;
9874+
}
9875+
#else
9876+
ada_really_inline bool has_tabs_or_newline(
98229877
std::string_view user_input) noexcept {
98239878
auto has_zero_byte = [](uint64_t v) {
98249879
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
@@ -9849,6 +9904,7 @@ ada_really_inline constexpr bool has_tabs_or_newline(
98499904
}
98509905
return running;
98519906
}
9907+
#endif
98529908

98539909
// A forbidden host code point is U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR,
98549910
// U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>),
@@ -13732,8 +13788,11 @@ bool url_aggregator::set_hostname(const std::string_view input) {
1373213788

1373313789
[[nodiscard]] std::string_view url_aggregator::get_host() const noexcept {
1373413790
ada_log("url_aggregator::get_host");
13791+
// Technically, we should check if there is a hostname, but
13792+
// the code below works even if there isn't.
13793+
// if(!has_hostname()) { return ""; }
1373513794
size_t start = components.host_start;
13736-
if (buffer.size() > components.host_start &&
13795+
if (components.host_end > components.host_start &&
1373713796
buffer[components.host_start] == '@') {
1373813797
start++;
1373913798
}
@@ -13747,9 +13806,12 @@ bool url_aggregator::set_hostname(const std::string_view input) {
1374713806

1374813807
[[nodiscard]] std::string_view url_aggregator::get_hostname() const noexcept {
1374913808
ada_log("url_aggregator::get_hostname");
13809+
// Technically, we should check if there is a hostname, but
13810+
// the code below works even if there isn't.
13811+
// if(!has_hostname()) { return ""; }
1375013812
size_t start = components.host_start;
1375113813
// So host_start is not where the host begins.
13752-
if (buffer.size() > components.host_start &&
13814+
if (components.host_end > components.host_start &&
1375313815
buffer[components.host_start] == '@') {
1375413816
start++;
1375513817
}

deps/ada.h

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2023-05-09 17:25:59 -0400. Do not edit! */
1+
/* auto-generated on 2023-05-16 13:48:47 -0400. Do not edit! */
22
/* begin file include/ada.h */
33
/**
44
* @file ada.h
@@ -468,6 +468,17 @@ namespace ada {
468468
if (!(COND)) __builtin_unreachable(); \
469469
} while (0)
470470
#endif
471+
472+
#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
473+
(defined(_M_AMD64) || defined(_M_X64) || \
474+
(defined(_M_IX86_FP) && _M_IX86_FP == 2))
475+
#define ADA_SSE2 1
476+
#endif
477+
478+
#if defined(__aarch64__) || defined(_M_ARM64)
479+
#define ADA_NEON 1
480+
#endif
481+
471482
#endif // ADA_COMMON_DEFS_H
472483
/* end file include/ada/common_defs.h */
473484
#include <stdint.h>
@@ -4320,7 +4331,7 @@ std::string to_unicode(std::string_view input);
43204331
* @attention The has_tabs_or_newline function is a bottleneck and it is simple
43214332
* enough that compilers like GCC can 'autovectorize it'.
43224333
*/
4323-
ada_really_inline constexpr bool has_tabs_or_newline(
4334+
ada_really_inline bool has_tabs_or_newline(
43244335
std::string_view user_input) noexcept;
43254336

43264337
/**
@@ -6473,14 +6484,14 @@ inline std::ostream &operator<<(std::ostream &out,
64736484
#ifndef ADA_ADA_VERSION_H
64746485
#define ADA_ADA_VERSION_H
64756486

6476-
#define ADA_VERSION "2.4.0"
6487+
#define ADA_VERSION "2.4.1"
64776488

64786489
namespace ada {
64796490

64806491
enum {
64816492
ADA_VERSION_MAJOR = 2,
64826493
ADA_VERSION_MINOR = 4,
6483-
ADA_VERSION_REVISION = 0,
6494+
ADA_VERSION_REVISION = 1,
64846495
};
64856496

64866497
} // namespace ada
@@ -6508,11 +6519,11 @@ using result = tl::expected<result_type, ada::errors>;
65086519

65096520
/**
65106521
* The URL parser takes a scalar value string input, with an optional null or
6511-
* base URL base (default null). The parser assumes the input has an UTF-8
6512-
* encoding.
6522+
* base URL base (default null). The parser assumes the input is a valid ASCII
6523+
* or UTF-8 string.
65136524
*
6514-
* @param input the string input to analyze.
6515-
* @param base_url the optional string input to use as a base url.
6525+
* @param input the string input to analyze (must be valid ASCII or UTF-8)
6526+
* @param base_url the optional URL input to use as a base url.
65166527
* @return a parsed URL.
65176528
*/
65186529
template <class result_type = ada::url_aggregator>
@@ -6525,14 +6536,17 @@ extern template ada::result<url_aggregator> parse<url_aggregator>(
65256536
std::string_view input, const url_aggregator* base_url);
65266537

65276538
/**
6539+
* Verifies whether the URL strings can be parsed. The function assumes
6540+
* that the inputs are valid ASCII or UTF-8 strings.
65286541
* @see https://url.spec.whatwg.org/#dom-url-canparse
65296542
* @return If URL can be parsed or not.
65306543
*/
65316544
bool can_parse(std::string_view input,
65326545
const std::string_view* base_input = nullptr);
65336546

65346547
/**
6535-
* Computes a href string from a file path.
6548+
* Computes a href string from a file path. The function assumes
6549+
* that the input is a valid ASCII or UTF-8 string.
65366550
* @return a href string (starts with file:://)
65376551
*/
65386552
std::string href_from_file(std::string_view path);

deps/ada_c.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ typedef struct {
3838

3939
typedef void* ada_url;
4040

41-
// input should be a null terminated C string
41+
// input should be a null terminated C string (ASCII or UTF-8)
4242
// you must call ada_free on the returned pointer
4343
ada_url ada_parse(const char* input, size_t length);
4444
ada_url ada_parse_with_base(const char* input, size_t input_length,

0 commit comments

Comments
 (0)