@@ -190,6 +190,11 @@ namespace tiny_utf8
190190 char last;
191191 } bytes;
192192 };
193+
194+ // ! strlen for different character types
195+ template <typename T>
196+ inline std::size_t strlen ( const T* str ){ std::size_t len = 0u ; while ( *str++ ) ++len; return len; }
197+ template <> inline std::size_t strlen<char >( const char * str ){ return std::strlen ( str ); }
193198 }
194199
195200
@@ -301,7 +306,7 @@ namespace tiny_utf8
301306 protected:
302307
303308 difference_type t_raw_index;
304- Container* t_instance;
309+ Container* t_instance = nullptr ;
305310
306311 protected:
307312
@@ -1590,10 +1595,10 @@ namespace tiny_utf8
15901595 */
15911596 inline raw_reference back () noexcept { return { back_index () , this }; }
15921597 inline value_type back () const noexcept {
1593- size_type sz = size ();
1598+ size_type my_size = size ();
15941599 const data_type* buffer = get_buffer ();
1595- width_type bytes = get_num_bytes_of_utf8_char_before ( buffer , sz );
1596- return decode_utf8 ( buffer + sz - bytes , bytes );
1600+ width_type bytes = get_num_bytes_of_utf8_char_before ( buffer , my_size );
1601+ return decode_utf8 ( buffer + my_size - bytes , bytes );
15971602 }
15981603
15991604
@@ -2130,6 +2135,177 @@ namespace tiny_utf8
21302135 size_type raw_find_last_not_of ( const value_type* str , size_type start_byte = basic_utf8_string::npos ) const noexcept ;
21312136
21322137
2138+ /* *
2139+ * Check, whether this string ends with the supplied character sequence
2140+ *
2141+ * @param str The string to compare the end of this string with
2142+ * @return true, if this string ends with the sequence 'str', false otherwise.
2143+ */
2144+ inline bool starts_with ( const basic_utf8_string& str ) const noexcept {
2145+ size_type my_size = size (), str_size = str.size ();
2146+ return my_size >= str_size && std::memcmp ( data () , str.data () , str_size ) == 0 ;
2147+ }
2148+ /* *
2149+ * Check, whether this string ends with the supplied character sequence
2150+ *
2151+ * @param str The string to compare the end of this string with
2152+ * @return true, if this string ends with the sequence 'str', false otherwise.
2153+ */
2154+ inline bool starts_with ( const std::string& str ) const noexcept {
2155+ size_type my_size = size (), str_size = str.size ();
2156+ return my_size >= str_size && std::memcmp ( data () , str.data () , str_size ) == 0 ;
2157+ }
2158+ /* *
2159+ * Check, whether this string ends with the supplied codepoint
2160+ *
2161+ * @param str The codepoint to compare the end of this string with
2162+ * @return true, if this string ends with the codepoint 'cp', false otherwise.
2163+ */
2164+ inline bool starts_with ( value_type cp ) const noexcept {
2165+ return !empty () && front () == cp;
2166+ }
2167+ /* *
2168+ * Check, whether this string ends with the supplied UTF-8 sequence.
2169+ *
2170+ * @param str Null-terminated string literal, interpreted as UTF-8. The pointer is expected to be valid
2171+ * @return true, if this string ends with the sequence 'str', false otherwise.
2172+ */
2173+ template <typename T>
2174+ bool starts_with ( T str , enable_if_ptr<T, data_type>* = {} ) const noexcept {
2175+ size_type my_size = size (), str_size = std::strlen ( str );
2176+ if ( my_size < str_size )
2177+ return false ;
2178+ for ( const data_type* my_data = data () ; *str && *str == *my_data ; ++str, ++my_data );
2179+ return !*str;
2180+ }
2181+ /* *
2182+ * Check, whether this string ends with the supplied UTF-8 sequence.
2183+ *
2184+ * @param str Pointer to a string literal with possibly embedded zeros, interpreted as UTF-8. The pointer is expected to be valid
2185+ * @return true, if this string ends with the sequence 'str', false otherwise.
2186+ */
2187+ template <size_type LITLEN>
2188+ bool starts_with ( const data_type (&str)[LITLEN] ) const noexcept {
2189+ size_type my_size = size (), str_size = str[LITLEN-1 ] ? LITLEN : LITLEN-1 ;
2190+ return my_size >= str_size && std::memcmp ( data () , str , str_size ) == 0 ;
2191+ }
2192+ /* *
2193+ * Check, whether this string ends with the supplied codepoint sequence.
2194+ *
2195+ * @param str Pointer to a null-terminated string literal, interpreted as UTF-32. The pointer is expected to be valid
2196+ * @return true, if this string ends with the sequence 'str', false otherwise.
2197+ */
2198+ template <typename T>
2199+ bool starts_with ( T str , enable_if_ptr<T, value_type>* = {} ) const noexcept {
2200+ for ( const_iterator it = cbegin (), end = cend () ; *str && it != end && *str == *it ; ++str, ++it );
2201+ return !*str;
2202+ }
2203+ /* *
2204+ * Check, whether this string ends with the supplied codepoint sequence.
2205+ *
2206+ * @param str Pointer to a string literal with possibly embedded zeros, interpreted as UTF-32. The pointer is expected to be valid
2207+ * @return true, if this string ends with the sequence 'str', false otherwise.
2208+ */
2209+ template <size_type LITLEN>
2210+ bool starts_with ( const value_type (&str)[LITLEN] ) const noexcept {
2211+ size_type str_len = str[LITLEN-1 ] ? LITLEN : LITLEN-1 ;
2212+ const_iterator it = cbegin (), end = cend ();
2213+ while ( it != end && str_len ){
2214+ if ( *it != *str )
2215+ return false ;
2216+ ++it, ++str, --str_len;
2217+ }
2218+ return !str_len;
2219+ }
2220+
2221+
2222+ /* *
2223+ * Check, whether this string ends with the supplied character sequence
2224+ *
2225+ * @param str The string to compare the end of this string with
2226+ * @return true, if this string ends with the sequence 'str', false otherwise.
2227+ */
2228+ inline bool ends_with ( const basic_utf8_string& str ) const noexcept {
2229+ size_type my_size = size (), str_size = str.size ();
2230+ return my_size >= str_size && std::memcmp ( data () + my_size - str_size , str.data () , str_size ) == 0 ;
2231+ }
2232+ /* *
2233+ * Check, whether this string ends with the supplied character sequence
2234+ *
2235+ * @param str The string to compare the end of this string with
2236+ * @return true, if this string ends with the sequence 'str', false otherwise.
2237+ */
2238+ inline bool ends_with ( const std::string& str ) const noexcept {
2239+ size_type my_size = size (), str_size = str.size ();
2240+ return my_size >= str_size && std::memcmp ( data () + my_size - str_size , str.data () , str_size ) == 0 ;
2241+ }
2242+ /* *
2243+ * Check, whether this string ends with the supplied codepoint
2244+ *
2245+ * @param str The codepoint to compare the end of this string with
2246+ * @return true, if this string ends with the codepoint 'cp', false otherwise.
2247+ */
2248+ inline bool ends_with ( value_type cp ) const noexcept {
2249+ return !empty () && back () == cp;
2250+ }
2251+ /* *
2252+ * Check, whether this string ends with the supplied UTF-8 sequence.
2253+ *
2254+ * @param str Null-terminated string literal, interpreted as UTF-8. The pointer is expected to be valid
2255+ * @return true, if this string ends with the sequence 'str', false otherwise.
2256+ */
2257+ template <typename T>
2258+ bool ends_with ( T str , enable_if_ptr<T, data_type>* = {} ) const noexcept {
2259+ size_type my_size = size (), str_size = std::strlen (str);
2260+ return my_size >= str_size && std::memcmp ( data () + my_size - str_size , str , str_size ) == 0 ;
2261+ }
2262+ /* *
2263+ * Check, whether this string ends with the supplied UTF-8 sequence.
2264+ *
2265+ * @param str Pointer to a string literal with possibly embedded zeros, interpreted as UTF-8. The pointer is expected to be valid
2266+ * @return true, if this string ends with the sequence 'str', false otherwise.
2267+ */
2268+ template <size_type LITLEN>
2269+ bool ends_with ( const data_type (&str)[LITLEN] ) const noexcept {
2270+ size_type my_size = size (), str_size = str[LITLEN-1 ] ? LITLEN : LITLEN-1 ;
2271+ return my_size >= str_size && std::memcmp ( data () + my_size - str_size , str , str_size ) == 0 ;
2272+ }
2273+ /* *
2274+ * Check, whether this string ends with the supplied codepoint sequence.
2275+ *
2276+ * @param str Pointer to a null-terminated string literal, interpreted as UTF-32. The pointer is expected to be valid
2277+ * @return true, if this string ends with the sequence 'str', false otherwise.
2278+ */
2279+ template <typename T>
2280+ bool ends_with ( T str , enable_if_ptr<T, value_type>* = {} ) const noexcept {
2281+ size_type str_len = tiny_utf8_detail::strlen ( str );
2282+ const_reverse_iterator it = crbegin (), end = crend ();
2283+ while ( it != end && str_len ){
2284+ if ( *it != str[--str_len] )
2285+ return false ;
2286+ ++it;
2287+ }
2288+ return !str_len;
2289+ }
2290+ /* *
2291+ * Check, whether this string ends with the supplied codepoint sequence.
2292+ *
2293+ * @param str Pointer to a string literal with possibly embedded zeros, interpreted as UTF-32. The pointer is expected to be valid
2294+ * @return true, if this string ends with the sequence 'str', false otherwise.
2295+ */
2296+ template <size_type LITLEN>
2297+ bool ends_with ( const value_type (&str)[LITLEN] ) const noexcept {
2298+ size_type str_len = str[LITLEN-1 ] ? LITLEN : LITLEN-1 ;
2299+ const_reverse_iterator it = crbegin (), end = crend ();
2300+ while ( it != end && str_len ){
2301+ if ( *it != str[--str_len] )
2302+ return false ;
2303+ ++it;
2304+ }
2305+ return !str_len;
2306+ }
2307+
2308+
21332309 /* *
21342310 * Compare this string with the supplied one.
21352311 *
@@ -2141,8 +2317,8 @@ namespace tiny_utf8
21412317 * the compared string, or all compared characters match but the compared string is longer.
21422318 */
21432319 inline int compare ( const basic_utf8_string& str ) const noexcept {
2144- size_type my_size = size (), str_size = str.size ();
2145- int result = std::memcmp ( data () , str.data () , my_size < str_size ? my_size : str_size );
2320+ size_type my_size = size (), str_size = str.size ();
2321+ int result = std::memcmp ( data () , str.data () , my_size < str_size ? my_size : str_size );
21462322 if ( !result && my_size != str_size )
21472323 result = my_size < str_size ? -1 : 1 ;
21482324 return result;
@@ -2158,15 +2334,15 @@ namespace tiny_utf8
21582334 * the compared string, or all compared characters match but the compared string is longer.
21592335 */
21602336 inline int compare ( const std::string& str ) const noexcept {
2161- size_type my_size = size (), str_size = str.size ();
2162- int result = std::memcmp ( data () , str.data () , my_size < str_size ? my_size : str_size );
2337+ size_type my_size = size (), str_size = str.size ();
2338+ int result = std::memcmp ( data () , str.data () , my_size < str_size ? my_size : str_size );
21632339 if ( !result && my_size != str_size )
21642340 result = my_size < str_size ? -1 : 1 ;
21652341 return result;
21662342 }
21672343 /* *
21682344 * Compares this string with the supplied one.
2169- * Thes supplied string literal is considered to end with the trailling '\0'.
2345+ * The supplied string literal is assumed to end at the (possibly trailling) '\0'.
21702346 * This is especially important, if this utf8 string contains embedded zeros.
21712347 *
21722348 * @param str Null-terminated string literal, interpreted as UTF-8. The pointer is expected to be valid
@@ -2209,7 +2385,7 @@ namespace tiny_utf8
22092385 }
22102386 /* *
22112387 * Compares this string with the supplied one.
2212- * Thes supplied string literal is considered to end with the trailling '\0'.
2388+ * Thes supplied string literal is assumed to end at the (possibly trailling) '\0'.
22132389 * This is especially important, if this utf8 string contains embedded zeros.
22142390 *
22152391 * @param str Pointer to a null-terminated string literal, interpreted as UTF-32. The pointer is expected to be valid
@@ -2242,7 +2418,7 @@ namespace tiny_utf8
22422418 template <size_type LITLEN>
22432419 int compare ( const value_type (&str)[LITLEN] ) const noexcept {
22442420 const_iterator it = cbegin (), end = cend ();
2245- size_type index = 0 , length = str[LITLEN-1 ] ? LITLEN : LITLEN-1 ;
2421+ size_type index = 0 , length = str[LITLEN-1 ] ? LITLEN : LITLEN-1 ;
22462422 while ( it != end && index < length ){
22472423 if ( *it != str[index] )
22482424 return *it < str[index] ? -1 : 1 ;
0 commit comments