Skip to content

Commit 43a8009

Browse files
committed
Clean up String::find to remove duplicate code, and speed up comparison with memcmp where possible.
1 parent b15a13e commit 43a8009

File tree

3 files changed

+62
-167
lines changed

3 files changed

+62
-167
lines changed

core/string/ustring.cpp

Lines changed: 39 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,17 @@ static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
6363
return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
6464
}
6565

66+
// Case-insensitive version of are_spans_equal
67+
template <typename T1, typename T2>
68+
static bool strings_equal_lower(const T1 *p_lhs_begin, const T2 *p_rhs_begin, size_t p_len) {
69+
for (size_t i = 0; i < p_len; ++i) {
70+
if (_find_lower(p_lhs_begin[i]) != _find_lower(p_rhs_begin[i])) {
71+
return false;
72+
}
73+
}
74+
return true;
75+
}
76+
6677
Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path, String &r_fragment) const {
6778
// Splits the URL into scheme, host, port, path, fragment. Strip credentials when present.
6879
String base = *this;
@@ -3043,41 +3054,7 @@ int String::find(const char *p_str, int p_from) const {
30433054
return find_char(*p_str, p_from); // Optimize with single-char find.
30443055
}
30453056

3046-
const char32_t *src = get_data();
3047-
3048-
if (str_len == 1) {
3049-
const char32_t needle = p_str[0];
3050-
3051-
for (int i = p_from; i < len; i++) {
3052-
if (src[i] == needle) {
3053-
return i;
3054-
}
3055-
}
3056-
3057-
} else {
3058-
for (int i = p_from; i <= (len - str_len); i++) {
3059-
bool found = true;
3060-
for (int j = 0; j < str_len; j++) {
3061-
int read_pos = i + j;
3062-
3063-
if (read_pos >= len) {
3064-
ERR_PRINT("read_pos>=length()");
3065-
return -1;
3066-
}
3067-
3068-
if (src[read_pos] != (char32_t)p_str[j]) {
3069-
found = false;
3070-
break;
3071-
}
3072-
}
3073-
3074-
if (found) {
3075-
return i;
3076-
}
3077-
}
3078-
}
3079-
3080-
return -1;
3057+
return span().find_sequence(Span((const unsigned char *)p_str, str_len), p_from);
30813058
}
30823059

30833060
int String::find_char(char32_t p_char, int p_from) const {
@@ -3110,36 +3087,21 @@ int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const {
31103087
const char32_t *src = get_data();
31113088

31123089
for (int i = p_from; i < len; i++) {
3113-
bool found = true;
31143090
for (int k = 0; k < key_count; k++) {
3115-
found = true;
3116-
if (r_key) {
3117-
*r_key = k;
3118-
}
3119-
const char32_t *cmp = keys[k].get_data();
3120-
int l = keys[k].length();
3121-
3122-
for (int j = 0; j < l; j++) {
3123-
int read_pos = i + j;
3091+
const int str_len = keys[k].length();
31243092

3125-
if (read_pos >= len) {
3126-
found = false;
3127-
break;
3128-
}
3093+
if (i + str_len > len) {
3094+
continue; // Can't find this key here.
3095+
}
31293096

3130-
if (src[read_pos] != cmp[j]) {
3131-
found = false;
3132-
break;
3097+
const char32_t *str = keys[k].get_data();
3098+
if (are_spans_equal(src + i, str, str_len)) {
3099+
if (r_key) {
3100+
*r_key = k;
31333101
}
3134-
}
3135-
if (found) {
3136-
break;
3102+
return i;
31373103
}
31383104
}
3139-
3140-
if (found) {
3141-
return i;
3142-
}
31433105
}
31443106

31453107
return -1;
@@ -3156,28 +3118,11 @@ int String::findn(const String &p_str, int p_from) const {
31563118
return -1; // Still out of bounds
31573119
}
31583120

3159-
const char32_t *srcd = get_data();
3121+
const char32_t *src = get_data();
3122+
const char32_t *str = p_str.get_data();
31603123

31613124
for (int i = p_from; i <= (len - str_len); i++) {
3162-
bool found = true;
3163-
for (int j = 0; j < str_len; j++) {
3164-
int read_pos = i + j;
3165-
3166-
if (read_pos >= len) {
3167-
ERR_PRINT("read_pos>=length()");
3168-
return -1;
3169-
}
3170-
3171-
char32_t src = _find_lower(srcd[read_pos]);
3172-
char32_t dst = _find_lower(p_str[j]);
3173-
3174-
if (src != dst) {
3175-
found = false;
3176-
break;
3177-
}
3178-
}
3179-
3180-
if (found) {
3125+
if (strings_equal_lower(src + i, str, str_len)) {
31813126
return i;
31823127
}
31833128
}
@@ -3196,28 +3141,10 @@ int String::findn(const char *p_str, int p_from) const {
31963141
return -1; // Still out of bounds
31973142
}
31983143

3199-
const char32_t *srcd = get_data();
3144+
const char32_t *src = get_data();
32003145

32013146
for (int i = p_from; i <= (len - str_len); i++) {
3202-
bool found = true;
3203-
for (int j = 0; j < str_len; j++) {
3204-
int read_pos = i + j;
3205-
3206-
if (read_pos >= len) {
3207-
ERR_PRINT("read_pos>=length()");
3208-
return -1;
3209-
}
3210-
3211-
char32_t src = _find_lower(srcd[read_pos]);
3212-
char32_t dst = _find_lower(p_str[j]);
3213-
3214-
if (src != dst) {
3215-
found = false;
3216-
break;
3217-
}
3218-
}
3219-
3220-
if (found) {
3147+
if (strings_equal_lower(src + i, p_str, str_len)) {
32213148
return i;
32223149
}
32233150
}
@@ -3255,31 +3182,12 @@ int String::rfind(const char *p_str, int p_from) const {
32553182
return -1; // Still out of bounds
32563183
}
32573184

3258-
const char32_t *source = get_data();
3259-
3260-
for (int i = p_from; i >= 0; i--) {
3261-
bool found = true;
3262-
for (int j = 0; j < str_len; j++) {
3263-
int read_pos = i + j;
3264-
3265-
if (read_pos >= length()) {
3266-
ERR_PRINT("read_pos>=length()");
3267-
return -1;
3268-
}
3269-
3270-
const char32_t key_needle = p_str[j];
3271-
if (source[read_pos] != key_needle) {
3272-
found = false;
3273-
break;
3274-
}
3275-
}
3276-
3277-
if (found) {
3278-
return i;
3279-
}
3185+
if (str_len == 1) {
3186+
// Optimize with single-char implementation.
3187+
return span().rfind(p_str[0], p_from);
32803188
}
32813189

3282-
return -1;
3190+
return span().rfind_sequence(Span((const unsigned char *)p_str, str_len), p_from);
32833191
}
32843192

32853193
int String::rfind_char(char32_t p_char, int p_from) const {
@@ -3303,28 +3211,16 @@ int String::rfindn(const String &p_str, int p_from) const {
33033211
return -1; // Still out of bounds
33043212
}
33053213

3214+
if (str_len == 1) {
3215+
// Optimize with single-char implementation.
3216+
return span().rfind(p_str[0], p_from);
3217+
}
3218+
33063219
const char32_t *src = get_data();
3220+
const char32_t *str = p_str.get_data();
33073221

33083222
for (int i = p_from; i >= 0; i--) {
3309-
bool found = true;
3310-
for (int j = 0; j < str_len; j++) {
3311-
int read_pos = i + j;
3312-
3313-
if (read_pos >= len) {
3314-
ERR_PRINT("read_pos>=length()");
3315-
return -1;
3316-
}
3317-
3318-
char32_t srcc = _find_lower(src[read_pos]);
3319-
char32_t dstc = _find_lower(p_str[j]);
3320-
3321-
if (srcc != dstc) {
3322-
found = false;
3323-
break;
3324-
}
3325-
}
3326-
3327-
if (found) {
3223+
if (strings_equal_lower(src + i, str, str_len)) {
33283224
return i;
33293225
}
33303226
}
@@ -3343,29 +3239,10 @@ int String::rfindn(const char *p_str, int p_from) const {
33433239
return -1; // Still out of bounds
33443240
}
33453241

3346-
const char32_t *source = get_data();
3242+
const char32_t *src = get_data();
33473243

33483244
for (int i = p_from; i >= 0; i--) {
3349-
bool found = true;
3350-
for (int j = 0; j < str_len; j++) {
3351-
int read_pos = i + j;
3352-
3353-
if (read_pos >= len) {
3354-
ERR_PRINT("read_pos>=length()");
3355-
return -1;
3356-
}
3357-
3358-
const char32_t key_needle = p_str[j];
3359-
int srcc = _find_lower(source[read_pos]);
3360-
int keyc = _find_lower(key_needle);
3361-
3362-
if (srcc != keyc) {
3363-
found = false;
3364-
break;
3365-
}
3366-
}
3367-
3368-
if (found) {
3245+
if (strings_equal_lower(src + i, p_str, str_len)) {
33693246
return i;
33703247
}
33713248
}

core/templates/span.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,14 @@ class Span {
116116

117117
// Algorithms.
118118
constexpr int64_t find(const T &p_val, uint64_t p_from = 0) const;
119-
constexpr int64_t find_sequence(const Span<T> &p_span, uint64_t p_from = 0) const;
119+
template <typename T1 = T>
120+
constexpr int64_t find_sequence(const Span<T1> &p_span, uint64_t p_from = 0) const;
120121
constexpr int64_t rfind(const T &p_val, uint64_t p_from) const;
121122
_FORCE_INLINE_ constexpr int64_t rfind(const T &p_val) const { return rfind(p_val, size() - 1); }
122-
constexpr int64_t rfind_sequence(const Span<T> &p_span, uint64_t p_from) const;
123-
_FORCE_INLINE_ constexpr int64_t rfind_sequence(const Span<T> &p_span) const { return rfind_sequence(p_span, size() - p_span.size()); }
123+
template <typename T1 = T>
124+
constexpr int64_t rfind_sequence(const Span<T1> &p_span, uint64_t p_from) const;
125+
template <typename T1 = T>
126+
_FORCE_INLINE_ constexpr int64_t rfind_sequence(const Span<T1> &p_span) const { return rfind_sequence(p_span, size() - p_span.size()); }
124127
constexpr uint64_t count(const T &p_val) const;
125128
/// Find the index of the given value using binary search.
126129
/// Note: Assumes that elements in the span are sorted. Otherwise, use find() instead.
@@ -142,7 +145,8 @@ constexpr int64_t Span<T>::find(const T &p_val, uint64_t p_from) const {
142145
}
143146

144147
template <typename T>
145-
constexpr int64_t Span<T>::find_sequence(const Span<T> &p_span, uint64_t p_from) const {
148+
template <typename T1>
149+
constexpr int64_t Span<T>::find_sequence(const Span<T1> &p_span, uint64_t p_from) const {
146150
for (uint64_t i = p_from; i <= size() - p_span.size(); i++) {
147151
if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) {
148152
return i;
@@ -154,6 +158,7 @@ constexpr int64_t Span<T>::find_sequence(const Span<T> &p_span, uint64_t p_from)
154158

155159
template <typename T>
156160
constexpr int64_t Span<T>::rfind(const T &p_val, uint64_t p_from) const {
161+
DEV_ASSERT(p_from < size());
157162
for (int64_t i = p_from; i >= 0; i--) {
158163
if (ptr()[i] == p_val) {
159164
return i;
@@ -163,7 +168,9 @@ constexpr int64_t Span<T>::rfind(const T &p_val, uint64_t p_from) const {
163168
}
164169

165170
template <typename T>
166-
constexpr int64_t Span<T>::rfind_sequence(const Span<T> &p_span, uint64_t p_from) const {
171+
template <typename T1>
172+
constexpr int64_t Span<T>::rfind_sequence(const Span<T1> &p_span, uint64_t p_from) const {
173+
DEV_ASSERT(p_from + p_span.size() <= size());
167174
for (int64_t i = p_from; i >= 0; i--) {
168175
if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) {
169176
return i;

tests/core/string/test_string.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,8 @@ TEST_CASE("[String] Find") {
396396
MULTICHECK_STRING_EQ(s, find, "tty", 3);
397397
MULTICHECK_STRING_EQ(s, find, "Revenge of the Monster Truck", -1);
398398
MULTICHECK_STRING_INT_EQ(s, find, "Wo", 9, 13);
399+
MULTICHECK_STRING_INT_EQ(s, find, "Wo", 1000, -1);
400+
MULTICHECK_STRING_INT_EQ(s, find, "Wo", -1, -1);
399401
MULTICHECK_STRING_EQ(s, find, "", -1);
400402
MULTICHECK_STRING_EQ(s, find, "Pretty Woman Woman", 0);
401403
MULTICHECK_STRING_EQ(s, find, "WOMAN", -1);
@@ -407,6 +409,8 @@ TEST_CASE("[String] Find") {
407409
MULTICHECK_STRING_EQ(s, rfind, "man", 15);
408410
MULTICHECK_STRING_EQ(s, rfind, "WOMAN", -1);
409411
MULTICHECK_STRING_INT_EQ(s, rfind, "", 15, -1);
412+
MULTICHECK_STRING_INT_EQ(s, rfind, "Wo", 1000, -1);
413+
MULTICHECK_STRING_INT_EQ(s, rfind, "Wo", -1, 13);
410414
}
411415

412416
TEST_CASE("[String] Find character") {
@@ -426,6 +430,8 @@ TEST_CASE("[String] Find case insensitive") {
426430
String s = "Pretty Whale Whale";
427431
MULTICHECK_STRING_EQ(s, findn, "WHA", 7);
428432
MULTICHECK_STRING_INT_EQ(s, findn, "WHA", 9, 13);
433+
MULTICHECK_STRING_INT_EQ(s, findn, "WHA", 1000, -1);
434+
MULTICHECK_STRING_INT_EQ(s, findn, "WHA", -1, -1);
429435
MULTICHECK_STRING_EQ(s, findn, "Revenge of the Monster SawFish", -1);
430436
MULTICHECK_STRING_EQ(s, findn, "", -1);
431437
MULTICHECK_STRING_EQ(s, findn, "wha", 7);
@@ -437,6 +443,8 @@ TEST_CASE("[String] Find case insensitive") {
437443
MULTICHECK_STRING_EQ(s, rfindn, "wha", 13);
438444
MULTICHECK_STRING_EQ(s, rfindn, "Wha", 13);
439445
MULTICHECK_STRING_INT_EQ(s, rfindn, "", 13, -1);
446+
MULTICHECK_STRING_INT_EQ(s, rfindn, "WHA", 1000, -1);
447+
MULTICHECK_STRING_INT_EQ(s, rfindn, "WHA", -1, 13);
440448
}
441449

442450
TEST_CASE("[String] Find MK") {
@@ -453,6 +461,9 @@ TEST_CASE("[String] Find MK") {
453461

454462
CHECK(s.findmk(keys, 5, &key) == 9);
455463
CHECK(key == 2);
464+
465+
CHECK(s.findmk(keys, -1, &key) == -1);
466+
CHECK(s.findmk(keys, 1000, &key) == -1);
456467
}
457468

458469
TEST_CASE("[String] Find and replace") {

0 commit comments

Comments
 (0)