99#include < boost/url.hpp>
1010#include < fmt/core.h>
1111#include < spdlog/spdlog.h>
12+ #include < string_utils/string_utils.hpp>
1213
1314#include " archive_constants.hpp"
1415
@@ -188,9 +189,9 @@ bool StringUtils::get_bounds_of_next_var(string const& msg, size_t& begin_pos, s
188189 end_pos = begin_pos;
189190 for (; end_pos < msg_length; ++end_pos) {
190191 char c = msg[end_pos];
191- if (is_decimal_digit (c)) {
192+ if (clp::string_utils:: is_decimal_digit (c)) {
192193 contains_decimal_digit = true ;
193- } else if (is_alphabet (c)) {
194+ } else if (clp::string_utils:: is_alphabet (c)) {
194195 contains_alphabet = true ;
195196 } else if (is_delim (c)) {
196197 break ;
@@ -212,308 +213,6 @@ bool StringUtils::get_bounds_of_next_var(string const& msg, size_t& begin_pos, s
212213 return (msg_length != begin_pos);
213214}
214215
215- size_t StringUtils::find_first_of (
216- string const & haystack,
217- char const * needles,
218- size_t search_start_pos,
219- size_t & needle_ix
220- ) {
221- size_t haystack_length = haystack.length ();
222- size_t needles_length = strlen (needles);
223- for (size_t i = search_start_pos; i < haystack_length; ++i) {
224- for (needle_ix = 0 ; needle_ix < needles_length; ++needle_ix) {
225- if (haystack[i] == needles[needle_ix]) {
226- return i;
227- }
228- }
229- }
230-
231- return string::npos;
232- }
233-
234- string StringUtils::replace_characters (
235- char const * characters_to_escape,
236- char const * replacement_characters,
237- string const & value,
238- bool escape
239- ) {
240- string new_value;
241- size_t search_start_pos = 0 ;
242- while (true ) {
243- size_t replace_char_ix;
244- size_t char_to_replace_pos
245- = find_first_of (value, characters_to_escape, search_start_pos, replace_char_ix);
246- if (string::npos == char_to_replace_pos) {
247- new_value.append (value, search_start_pos, string::npos);
248- break ;
249- } else {
250- new_value.append (value, search_start_pos, char_to_replace_pos - search_start_pos);
251- if (escape) {
252- new_value += " \\ " ;
253- }
254- new_value += replacement_characters[replace_char_ix];
255- search_start_pos = char_to_replace_pos + 1 ;
256- }
257- }
258- return new_value;
259- }
260-
261- void StringUtils::to_lower (string& str) {
262- std::transform (str.cbegin (), str.cend (), str.begin (), [](unsigned char c) {
263- return std::tolower (c);
264- });
265- }
266-
267- bool StringUtils::is_wildcard (char c) {
268- static constexpr char cWildcards[] = " ?*" ;
269- for (size_t i = 0 ; i < strlen (cWildcards); ++i) {
270- if (cWildcards[i] == c) {
271- return true ;
272- }
273- }
274- return false ;
275- }
276-
277- string StringUtils::clean_up_wildcard_search_string (string_view str) {
278- string cleaned_str;
279-
280- bool is_escaped = false ;
281- auto str_end = str.cend ();
282- for (auto current = str.cbegin (); current != str_end;) {
283- auto c = *current;
284- if (is_escaped) {
285- is_escaped = false ;
286-
287- if (is_wildcard (c) || ' \\ ' == c) {
288- // Keep escaping if c is a wildcard character or an escape character
289- cleaned_str += ' \\ ' ;
290- }
291- cleaned_str += c;
292- ++current;
293- } else if (' *' == c) {
294- cleaned_str += c;
295-
296- // Skip over all '*' to find the next non-'*'
297- do {
298- ++current;
299- } while (current != str_end && ' *' == *current);
300- } else {
301- if (' \\ ' == c) {
302- is_escaped = true ;
303- } else {
304- cleaned_str += c;
305- }
306- ++current;
307- }
308- }
309-
310- return cleaned_str;
311- }
312-
313- bool StringUtils::advance_tame_to_next_match (
314- char const *& tame_current,
315- char const *& tame_bookmark,
316- char const * tame_end,
317- char const *& wild_current,
318- char const *& wild_bookmark
319- ) {
320- auto w = *wild_current;
321- if (' ?' != w) {
322- // No need to check for '*' since the caller ensures wild doesn't
323- // contain consecutive '*'
324-
325- // Handle escaped characters
326- if (' \\ ' == w) {
327- ++wild_current;
328- // This is safe without a bounds check since this the caller
329- // ensures there are no dangling escape characters
330- w = *wild_current;
331- }
332-
333- // Advance tame_current until it matches wild_current
334- while (true ) {
335- if (tame_end == tame_current) {
336- // Wild group is longer than last group in tame, so
337- // can't match
338- // e.g. "*abc" doesn't match "zab"
339- return false ;
340- }
341- auto t = *tame_current;
342- if (t == w) {
343- break ;
344- }
345- ++tame_current;
346- }
347- }
348-
349- tame_bookmark = tame_current;
350-
351- return true ;
352- }
353-
354- bool
355- StringUtils::wildcard_match_unsafe (string_view tame, string_view wild, bool case_sensitive_match) {
356- if (case_sensitive_match) {
357- return wildcard_match_unsafe_case_sensitive (tame, wild);
358- } else {
359- // We convert to lowercase (rather than uppercase) anticipating that
360- // callers use lowercase more frequently, so little will need to change.
361- string lowercase_tame (tame);
362- to_lower (lowercase_tame);
363- string lowercase_wild (wild);
364- to_lower (lowercase_wild);
365- return wildcard_match_unsafe_case_sensitive (lowercase_tame, lowercase_wild);
366- }
367- }
368-
369- /* *
370- * The algorithm basically works as follows:
371- * Given a wild string "*abc*def*ghi*", it can be broken into groups of
372- * characters delimited by one or more '*' characters. The goal of the
373- * algorithm is then to determine whether the tame string contains each of
374- * those groups in the same order.
375- *
376- * Thus, the algorithm:
377- * 1. searches for the start of one of these groups in wild,
378- * 2. searches for a group in tame starting with the same character, and then
379- * 3. checks if the two match. If not, the search repeats with the next group in
380- * tame.
381- */
382- bool StringUtils::wildcard_match_unsafe_case_sensitive (string_view tame, string_view wild) {
383- auto const tame_length = tame.length ();
384- auto const wild_length = wild.length ();
385- char const * tame_current = tame.data ();
386- char const * wild_current = wild.data ();
387- char const * tame_bookmark = nullptr ;
388- char const * wild_bookmark = nullptr ;
389- char const * tame_end = tame_current + tame_length;
390- char const * wild_end = wild_current + wild_length;
391-
392- // Handle wild or tame being empty
393- if (0 == wild_length) {
394- return 0 == tame_length;
395- } else {
396- if (0 == tame_length) {
397- return " *" == wild;
398- }
399- }
400-
401- char w;
402- char t;
403- bool is_escaped = false ;
404- while (true ) {
405- w = *wild_current;
406- if (' *' == w) {
407- ++wild_current;
408- if (wild_end == wild_current) {
409- // Trailing '*' means everything remaining in tame will match
410- return true ;
411- }
412-
413- // Set wild and tame bookmarks
414- wild_bookmark = wild_current;
415- if (!advance_tame_to_next_match (
416- tame_current,
417- tame_bookmark,
418- tame_end,
419- wild_current,
420- wild_bookmark
421- ))
422- {
423- return false ;
424- }
425- } else {
426- // Handle escaped characters
427- if (' \\ ' == w) {
428- is_escaped = true ;
429- ++wild_current;
430- // This is safe without a bounds check since this the caller
431- // ensures there are no dangling escape characters
432- w = *wild_current;
433- }
434-
435- // Handle a mismatch
436- t = *tame_current;
437- if (false == ((false == is_escaped && ' ?' == w) || t == w)) {
438- if (nullptr == wild_bookmark) {
439- // No bookmark to return to
440- return false ;
441- }
442-
443- wild_current = wild_bookmark;
444- tame_current = tame_bookmark + 1 ;
445- if (!advance_tame_to_next_match (
446- tame_current,
447- tame_bookmark,
448- tame_end,
449- wild_current,
450- wild_bookmark
451- ))
452- {
453- return false ;
454- }
455- }
456- }
457-
458- ++tame_current;
459- ++wild_current;
460-
461- // Handle reaching the end of tame or wild
462- if (tame_end == tame_current) {
463- return (wild_end == wild_current
464- || (' *' == *wild_current && (wild_current + 1 ) == wild_end));
465- } else {
466- if (wild_end == wild_current) {
467- if (nullptr == wild_bookmark) {
468- // No bookmark to return to
469- return false ;
470- } else {
471- wild_current = wild_bookmark;
472- tame_current = tame_bookmark + 1 ;
473- if (!advance_tame_to_next_match (
474- tame_current,
475- tame_bookmark,
476- tame_end,
477- wild_current,
478- wild_bookmark
479- ))
480- {
481- return false ;
482- }
483- }
484- }
485- }
486- }
487- }
488-
489- bool StringUtils::convert_string_to_int64 (std::string_view raw, int64_t & converted) {
490- auto raw_end = raw.cend ();
491- auto result = std::from_chars (raw.cbegin (), raw_end, converted);
492- if (raw_end != result.ptr ) {
493- return false ;
494- } else {
495- return result.ec == std::errc ();
496- }
497- }
498-
499- bool StringUtils::convert_string_to_double (std::string const & raw, double & converted) {
500- if (raw.empty ()) {
501- // Can't convert an empty string
502- return false ;
503- }
504-
505- char const * c_str = raw.c_str ();
506- char * end_ptr;
507- // Reset errno so we can detect a new error
508- errno = 0 ;
509- double raw_as_double = strtod (c_str, &end_ptr);
510- if (ERANGE == errno || (end_ptr - c_str) < raw.length ()) {
511- return false ;
512- }
513- converted = raw_as_double;
514- return true ;
515- }
516-
517216void StringUtils::escape_json_string (std::string& destination, std::string_view const source) {
518217 // Escaping is implemented using this `append_unescaped_slice` approach to offer a fast path
519218 // when strings are mostly or entirely valid escaped JSON. Benchmarking shows that this offers
0 commit comments