@@ -420,23 +420,38 @@ function clamp(int $val, ?int $min = null, ?int $max = null): int
420420function truncate (string $ string , int $ limit , string $ break = " " , string $ pad = "... " ): string
421421{
422422 $ e = "UTF-8 " ;
423- $ strlen = mb_strlen ($ string , $ e );
424423 $ padlen = mb_strlen ($ pad , $ e );
425424 assert ($ limit > $ padlen , "Can't truncate to a length less than the padding length " );
426425
427- // if string is shorter or equal to limit, leave it alone
428- if ($ strlen <= $ limit ) {
426+ /*
427+ * Truncate tentatively, and then check if the lengths stayed the same.
428+ *
429+ * This approach is faster than calling mb_strlen and checking against the limit, as mb_strlen
430+ * has O(n) cost which will slow down significantly for long texts. mb_substr also has O(n)
431+ * cost, but bounded to $limit, which is usually small.
432+ *
433+ * strlen has O(1) cost so it's the fastest way to check if anything happened.
434+ */
435+ $ truncated = mb_substr ($ string , 0 , $ limit , $ e );
436+ if (strlen ($ truncated ) == strlen ($ string )) {
429437 return $ string ;
430438 }
431439
432- // if there is a break point between 0 and $limit, truncate to that
433- $ breakpoint = mb_strrpos ($ string , $ break , -($ strlen - $ limit + $ padlen ), $ e );
440+ // We've already determined it is too long. Now truncate again to add space for the pad text.
441+ $ truncated = mb_substr ($ truncated , 0 , $ limit - $ padlen , $ e );
442+
443+ /*
444+ * If there is a break point, truncate to that.
445+ *
446+ * We do not need to use the slower mb_* functions for this - if $break is a well-formed UTF-8
447+ * sequence, this will always result in properly formed UTF-8.
448+ */
449+ $ breakpoint = strrpos ($ truncated , $ break );
434450 if ($ breakpoint !== false ) {
435- return mb_substr ( $ string , 0 , $ breakpoint, $ e ) . $ pad ;
451+ $ truncated = substr ( $ truncated , 0 , $ breakpoint) ;
436452 }
437453
438- // if there is no break point, cut mid-word
439- return mb_substr ($ string , 0 , $ limit - $ padlen , $ e ) . $ pad ;
454+ return $ truncated . $ pad ;
440455}
441456
442457/**
0 commit comments