3939import java .util .stream .Stream ;
4040
4141import org .apache .commons .io .FileSystem .NameLengthStrategy ;
42+ import org .apache .commons .lang3 .JavaVersion ;
4243import org .apache .commons .lang3 .SystemProperties ;
4344import org .apache .commons .lang3 .SystemUtils ;
4445import org .junit .jupiter .api .Test ;
@@ -68,7 +69,7 @@ class FileSystemTest {
6869 /**
6970 * A grapheme cluster that encodes to 69 UTF-8 bytes and 31 UTF-16 code units: 👩🏻🦰👨🏿🦲👧🏽🦱👦🏼🦳
7071 * <p>
71- * This should be treated as a single character for truncation purposes,
72+ * This should be treated as a single character in JDK 20+ for truncation purposes,
7273 * even if it contains parts that have a meaning on their own.
7374 * </p>
7475 * <ul>
@@ -376,6 +377,16 @@ void testToLegalFileNameWindows() {
376377 }
377378
378379 static Stream <Arguments > testNameLengthStrategyTruncate_Succeeds () {
380+ // The grapheme cluster CHAR_UTF8_69B is treated as a single character in JDK 20+,
381+ final String woman ;
382+ final String redHeadWoman ;
383+ if (SystemUtils .isJavaVersionAtMost (JavaVersion .JAVA_19 )) {
384+ woman = CHAR_UTF8_69B .substring (0 , 2 ); // 👩
385+ redHeadWoman = CHAR_UTF8_69B .substring (0 , 7 ); // 👩🏻🦰
386+ } else {
387+ woman = "" ;
388+ redHeadWoman = "" ;
389+ }
379390 return Stream .of (
380391 // Truncation by bytes
381392 // -------------------
@@ -394,7 +405,6 @@ static Stream<Arguments> testNameLengthStrategyTruncate_Succeeds() {
394405 Arguments .of (BYTES , 23 , repeat (CHAR_UTF8_2B , 10 ) + ".txt" , repeat (CHAR_UTF8_2B , 9 ) + ".txt" ),
395406 Arguments .of (BYTES , 33 , repeat (CHAR_UTF8_3B , 10 ) + ".txt" , repeat (CHAR_UTF8_3B , 9 ) + ".txt" ),
396407 Arguments .of (BYTES , 43 , repeat (CHAR_UTF8_4B , 10 ) + ".txt" , repeat (CHAR_UTF8_4B , 9 ) + ".txt" ),
397- Arguments .of (BYTES , 75 , repeat (CHAR_UTF8_69B , 2 ) + ".txt" , repeat (CHAR_UTF8_69B , 1 ) + ".txt" ),
398408 // Names without extensions
399409 Arguments .of (BYTES , 1 , CHAR_UTF8_1B , CHAR_UTF8_1B ),
400410 Arguments .of (BYTES , 2 , CHAR_UTF8_2B , CHAR_UTF8_2B ),
@@ -407,8 +417,8 @@ static Stream<Arguments> testNameLengthStrategyTruncate_Succeeds() {
407417 // Grapheme cluster
408418 Arguments .of (BYTES , 69 , CHAR_UTF8_69B , CHAR_UTF8_69B ),
409419 // Will not cut 4 or 15 bytes of the grapheme cluster
410- Arguments .of (BYTES , 69 + 4 , repeat (CHAR_UTF8_69B , 2 ), repeat ( CHAR_UTF8_69B , 1 ) ),
411- Arguments .of (BYTES , 69 + 15 , repeat (CHAR_UTF8_69B , 2 ), repeat ( CHAR_UTF8_69B , 1 ) ),
420+ Arguments .of (BYTES , 69 + 4 , repeat (CHAR_UTF8_69B , 2 ), CHAR_UTF8_69B + woman ),
421+ Arguments .of (BYTES , 69 + 15 , repeat (CHAR_UTF8_69B , 2 ), CHAR_UTF8_69B + redHeadWoman ),
412422 // Truncation by UTF-16 code units
413423 // -------------------------------
414424 // Empty
@@ -420,7 +430,6 @@ static Stream<Arguments> testNameLengthStrategyTruncate_Succeeds() {
420430 Arguments .of (UTF16_CODE_UNITS , 10 , "." + repeat (CHAR_UTF8_2B , 10 ), "." + repeat (CHAR_UTF8_2B , 9 )),
421431 Arguments .of (UTF16_CODE_UNITS , 10 , "." + repeat (CHAR_UTF8_3B , 10 ), "." + repeat (CHAR_UTF8_3B , 9 )),
422432 Arguments .of (UTF16_CODE_UNITS , 20 , "." + repeat (CHAR_UTF8_4B , 10 ), "." + repeat (CHAR_UTF8_4B , 9 )),
423- Arguments .of (UTF16_CODE_UNITS , 34 , "." + repeat (CHAR_UTF8_69B , 2 ), "." + repeat (CHAR_UTF8_69B , 1 )),
424433 // Names with extensions
425434 Arguments .of (UTF16_CODE_UNITS , 13 , repeat (CHAR_UTF8_1B , 10 ) + ".txt" , repeat (CHAR_UTF8_1B , 9 ) + ".txt" ),
426435 Arguments .of (UTF16_CODE_UNITS , 13 , repeat (CHAR_UTF8_2B , 10 ) + ".txt" , repeat (CHAR_UTF8_2B , 9 ) + ".txt" ),
@@ -438,8 +447,8 @@ static Stream<Arguments> testNameLengthStrategyTruncate_Succeeds() {
438447 // Grapheme cluster
439448 Arguments .of (UTF16_CODE_UNITS , 31 , CHAR_UTF8_69B , CHAR_UTF8_69B ),
440449 // Will not cut 2 or 7 UTF-16 code units of the grapheme cluster
441- Arguments .of (UTF16_CODE_UNITS , 31 + 2 , repeat (CHAR_UTF8_69B , 2 ), repeat ( CHAR_UTF8_69B , 1 ) ),
442- Arguments .of (UTF16_CODE_UNITS , 31 + 7 , repeat (CHAR_UTF8_69B , 2 ), repeat ( CHAR_UTF8_69B , 1 ) ));
450+ Arguments .of (UTF16_CODE_UNITS , 31 + 2 , repeat (CHAR_UTF8_69B , 2 ), CHAR_UTF8_69B + woman ),
451+ Arguments .of (UTF16_CODE_UNITS , 31 + 7 , repeat (CHAR_UTF8_69B , 2 ), CHAR_UTF8_69B + redHeadWoman ));
443452 }
444453
445454 @ ParameterizedTest (name = "{index}: {0} truncates {1} to {2}" )
@@ -450,7 +459,7 @@ void testNameLengthStrategyTruncate_Succeeds(NameLengthStrategy strategy, int li
450459 }
451460
452461 static Stream <Arguments > testNameLengthStrategyTruncate_Throws () {
453- return Stream .of (
462+ final Stream < Arguments > common = Stream .of (
454463 // Encoding issues
455464 Arguments .of (BYTES , 10 , "café" , US_ASCII , "US-ASCII" ),
456465 Arguments .of (UTF16_CODE_UNITS , 10 , "\uD800 .txt" , UTF_8 , "UTF-16" ),
@@ -460,9 +469,16 @@ static Stream<Arguments> testNameLengthStrategyTruncate_Throws() {
460469 Arguments .of (UTF16_CODE_UNITS , 4 , "a.txt" , UTF_8 , "extension" ),
461470 // Limit too small
462471 Arguments .of (BYTES , 3 , CHAR_UTF8_4B , UTF_8 , "truncated to 1 character" ),
463- Arguments .of (BYTES , 68 , CHAR_UTF8_69B , UTF_8 , "truncated to 29 characters" ),
464- Arguments .of (UTF16_CODE_UNITS , 1 , CHAR_UTF8_4B , UTF_8 , "truncated to 1 character" ),
465- Arguments .of (UTF16_CODE_UNITS , 30 , CHAR_UTF8_69B , UTF_8 , "truncated to 30 characters" ));
472+ Arguments .of (UTF16_CODE_UNITS , 1 , CHAR_UTF8_4B , UTF_8 , "truncated to 1 character" ));
473+ return SystemUtils .isJavaVersionAtMost (JavaVersion .JAVA_19 )
474+ ? common
475+ : Stream .concat (
476+ common ,
477+ // In JDK 20+ the grapheme cluster CHAR_UTF8_69B is treated as a single character,
478+ // so cannot be truncated to 2 or 7 code units
479+ Stream .of (
480+ Arguments .of (BYTES , 68 , CHAR_UTF8_69B , UTF_8 , "truncated to 29 characters" ),
481+ Arguments .of (UTF16_CODE_UNITS , 30 , CHAR_UTF8_69B , UTF_8 , "truncated to 30 characters" )));
466482 }
467483
468484 @ ParameterizedTest (name = "{index}: {0} truncates {2} with limit {1} throws" )
0 commit comments