Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 81 additions & 79 deletions components/core/src/clp/string_utils/string_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "string_utils/string_utils.hpp"

#include <algorithm>
#include <charconv>
#include <cctype>
#include <cstring>
#include <string>
#include <string_view>
Expand All @@ -14,33 +14,35 @@ namespace {
* Helper for ``wildcard_match_unsafe_case_sensitive`` to advance the pointer in
* tame to the next character which matches wild. This method should be inlined
* for performance.
*
* @param tame_current
* @param tame_bookmark
* @param tame_end
* @param wild_current
* @param wild_bookmark
* @return true on success, false if wild cannot match tame
*/
inline bool advance_tame_to_next_match(
[[nodiscard]] inline auto advance_tame_to_next_match(
char const*& tame_current,
char const*& tame_bookmark,
char const* tame_end,
char const*& wild_current
);
) -> bool;

inline bool advance_tame_to_next_match(
inline auto advance_tame_to_next_match(
char const*& tame_current,
char const*& tame_bookmark,
char const* tame_end,
char const*& wild_current
) {
) -> bool {
Comment on lines +32 to +37
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

[[nodiscard]] missing from the definition of advance_tame_to_next_match.

The forward declaration (line 25) correctly carries [[nodiscard]], but the definition at line 32 omits it. The PR explicitly adds [[nodiscard]] to both declarations and definitions for all other updated functions; this definition is the lone exception.

♻️ Proposed fix
-inline auto advance_tame_to_next_match(
+[[nodiscard]] inline auto advance_tame_to_next_match(
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
inline auto advance_tame_to_next_match(
char const*& tame_current,
char const*& tame_bookmark,
char const* tame_end,
char const*& wild_current
) {
) -> bool {
[[nodiscard]] inline auto advance_tame_to_next_match(
char const*& tame_current,
char const*& tame_bookmark,
char const* tame_end,
char const*& wild_current
) -> bool {
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/core/src/clp/string_utils/string_utils.cpp` around lines 32 - 37,
The definition of advance_tame_to_next_match is missing the [[nodiscard]]
attribute present on its forward declaration; update the function definition
signature for advance_tame_to_next_match(char const*& tame_current, char const*&
tame_bookmark, char const* tame_end, char const*& wild_current) to include
[[nodiscard]] so it matches the declaration and the rest of the updated
functions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No we only add [[nodiscard]] to declarations, not definitions.

auto w = *wild_current;
if ('?' != w) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if ('?' != w) {
if (cSingleCharWildcard != w) {

let's make use of the constants inside constants.hpp

// No need to check for '*' since the caller ensures wild doesn't
// contain consecutive '*'

// Handle escaped characters
if ('\\' == w) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++wild_current;
// This is safe without a bounds check since this the caller ensures
// there are no dangling escape characters
Expand All @@ -58,6 +60,7 @@ inline bool advance_tame_to_next_match(
if (t == w) {
break;
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++tame_current;
}
}
Expand All @@ -69,16 +72,17 @@ inline bool advance_tame_to_next_match(
} // namespace

namespace clp::string_utils {
size_t find_first_of(
auto find_first_of(
string const& haystack,
char const* needles,
size_t search_start_pos,
size_t& needle_ix
) {
size_t haystack_length = haystack.length();
size_t needles_length = strlen(needles);
) -> size_t {
size_t const haystack_length{haystack.length()};
size_t const needles_length{strlen(needles)};
for (size_t i = search_start_pos; i < haystack_length; ++i) {
for (needle_ix = 0; needle_ix < needles_length; ++needle_ix) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
if (haystack[i] == needles[needle_ix]) {
return i;
}
Expand All @@ -88,29 +92,29 @@ size_t find_first_of(
return string::npos;
}

string replace_characters(
auto replace_characters(
char const* characters_to_replace,
char const* replacement_characters,
string const& value,
bool escape
) {
) -> string {
string new_value;
size_t search_start_pos = 0;
size_t search_start_pos{0};
while (true) {
size_t replace_char_ix;
size_t char_to_replace_pos
size_t replace_char_ix{0};
size_t const char_to_replace_pos
= find_first_of(value, characters_to_replace, search_start_pos, replace_char_ix);
Comment on lines +105 to 106
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
size_t const char_to_replace_pos
= find_first_of(value, characters_to_replace, search_start_pos, replace_char_ix);
auto const char_to_replace_pos
= find_first_of(value, characters_to_replace, search_start_pos, replace_char_ix);

if (string::npos == char_to_replace_pos) {
new_value.append(value, search_start_pos, string::npos);
new_value.append(value, search_start_pos);
break;
} else {
new_value.append(value, search_start_pos, char_to_replace_pos - search_start_pos);
if (escape) {
new_value += "\\";
}
new_value += replacement_characters[replace_char_ix];
search_start_pos = char_to_replace_pos + 1;
}
new_value.append(value, search_start_pos, char_to_replace_pos - search_start_pos);
if (escape) {
new_value += "\\";
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
new_value += replacement_characters[replace_char_ix];
search_start_pos = char_to_replace_pos + 1;
}
return new_value;
}
Expand Down Expand Up @@ -143,22 +147,16 @@ void to_lower(string& str) {
});
}

bool is_wildcard(char c) {
static constexpr char cWildcards[] = "?*";
for (size_t i = 0; i < strlen(cWildcards); ++i) {
if (cWildcards[i] == c) {
return true;
}
}
return false;
auto is_wildcard(char c) -> bool {
return '?' == c || '*' == c;
}

string clean_up_wildcard_search_string(string_view str) {
auto clean_up_wildcard_search_string(string_view str) -> string {
string cleaned_str;

bool is_escaped = false;
auto str_end = str.cend();
for (auto current = str.cbegin(); current != str_end;) {
bool is_escaped{false};
auto const* const str_end = str.cend();
for (auto const* current = str.cbegin(); current != str_end;) {
auto c = *current;
if (is_escaped) {
is_escaped = false;
Expand All @@ -169,20 +167,25 @@ string clean_up_wildcard_search_string(string_view str) {
cleaned_str += '\\';
}
cleaned_str += c;
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++current;
} else if ('*' == c) {
cleaned_str += c;

// Skip over all '*' to find the next non-'*'
do {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++current;
while (current != str_end && '*' == *current) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++current;
} while (current != str_end && '*' == *current);
}
} else {
if ('\\' == c) {
is_escaped = true;
} else {
cleaned_str += c;
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++current;
}
}
Expand All @@ -206,18 +209,17 @@ auto unescape_string(std::string_view str) -> std::string {
return unescaped_str;
}

bool wildcard_match_unsafe(string_view tame, string_view wild, bool case_sensitive_match) {
auto wildcard_match_unsafe(string_view tame, string_view wild, bool case_sensitive_match) -> bool {
if (case_sensitive_match) {
return wildcard_match_unsafe_case_sensitive(tame, wild);
} else {
// We convert to lowercase (rather than uppercase) anticipating that
// callers use lowercase more frequently, so little will need to change.
string lowercase_tame(tame);
to_lower(lowercase_tame);
string lowercase_wild(wild);
to_lower(lowercase_wild);
return wildcard_match_unsafe_case_sensitive(lowercase_tame, lowercase_wild);
}
// We convert to lowercase (rather than uppercase) anticipating that
// callers use lowercase more frequently, so little will need to change.
Comment on lines +216 to +217
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// We convert to lowercase (rather than uppercase) anticipating that
// callers use lowercase more frequently, so little will need to change.
// We convert to lowercase (rather than uppercase) anticipating that callers
// use lowercase more frequently, so little will need to change.

Reflow comments for the 80 character line limit used in this file.

string lowercase_tame{tame};
to_lower(lowercase_tame);
string lowercase_wild{wild};
to_lower(lowercase_wild);
return wildcard_match_unsafe_case_sensitive(lowercase_tame, lowercase_wild);
}

/**
Expand All @@ -233,31 +235,32 @@ bool wildcard_match_unsafe(string_view tame, string_view wild, bool case_sensiti
* 3. checks if the two match. If not, the search repeats with the next group in
* tame.
*/
bool wildcard_match_unsafe_case_sensitive(string_view tame, string_view wild) {
auto wildcard_match_unsafe_case_sensitive(string_view tame, string_view wild) -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// NOLINTBEGIN(readability-function-cognitive-complexity)
.... function
// NOLINTEND(readability-function-cognitive-complexity)

auto const tame_length = tame.length();
auto const wild_length = wild.length();
char const* tame_current = tame.data();
char const* wild_current = wild.data();
char const* tame_bookmark = nullptr;
char const* wild_bookmark = nullptr;
char const* tame_current{tame.data()};
char const* wild_current{wild.data()};
char const* tame_bookmark{nullptr};
char const* wild_bookmark{nullptr};
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
char const* tame_end = tame_current + tame_length;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
char const* tame_end = tame_current + tame_length;
char const* tame_end{tame_current + tame_length};

// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
char const* wild_end = wild_current + wild_length;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
char const* wild_end = wild_current + wild_length;
char const* wild_end{wild_current + wild_length};


// Handle wild or tame being empty
if (0 == wild_length) {
return 0 == tame_length;
} else {
if (0 == tame_length) {
return "*" == wild;
}
}
if (0 == tame_length) {
return "*" == wild;
}

char w;
char t;
bool is_escaped = false;
char w{'\0'};
char t{'\0'};
while (true) {
w = *wild_current;
if ('*' == w) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++wild_current;
if (wild_end == wild_current) {
// Trailing '*' means everything remaining in tame will match
Expand All @@ -273,8 +276,9 @@ bool wildcard_match_unsafe_case_sensitive(string_view tame, string_view wild) {
}
} else {
// Handle escaped characters
if ('\\' == w) {
is_escaped = true;
bool is_escaped = '\\' == w;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool is_escaped = '\\' == w;
auto const is_escaped = ('\\' == w);

or

Suggested change
bool is_escaped = '\\' == w;
bool const is_escaped{'\\' == w};

if (is_escaped) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++wild_current;
// This is safe without a bounds check since this the caller
// ensures there are no dangling escape characters
Expand All @@ -283,13 +287,14 @@ bool wildcard_match_unsafe_case_sensitive(string_view tame, string_view wild) {

// Handle a mismatch
t = *tame_current;
if (!((false == is_escaped && '?' == w) || t == w)) {
if (false == ((false == is_escaped && '?' == w) || t == w)) {
if (nullptr == wild_bookmark) {
// No bookmark to return to
return false;
}

wild_current = wild_bookmark;
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
tame_current = tame_bookmark + 1;
if (false
== advance_tame_to_next_match(
Expand All @@ -304,32 +309,29 @@ bool wildcard_match_unsafe_case_sensitive(string_view tame, string_view wild) {
}
}

// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++tame_current;
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
++wild_current;

// Handle reaching the end of tame or wild
if (tame_end == tame_current) {
return (wild_end == wild_current
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
|| ('*' == *wild_current && (wild_current + 1) == wild_end));
} else {
if (wild_end == wild_current) {
if (nullptr == wild_bookmark) {
// No bookmark to return to
return false;
} else {
wild_current = wild_bookmark;
tame_current = tame_bookmark + 1;
if (false
== advance_tame_to_next_match(
tame_current,
tame_bookmark,
tame_end,
wild_current
))
{
return false;
}
}
}
if (wild_end == wild_current) {
if (nullptr == wild_bookmark) {
// No bookmark to return to
return false;
}
wild_current = wild_bookmark;
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
tame_current = tame_bookmark + 1;
if (false
== advance_tame_to_next_match(tame_current, tame_bookmark, tame_end, wild_current))
{
return false;
}
}
}
Expand Down
Loading
Loading