1717
1818package org .apache .commons .io ;
1919
20+ import java .nio .ByteBuffer ;
21+ import java .nio .CharBuffer ;
22+ import java .nio .charset .CharacterCodingException ;
23+ import java .nio .charset .Charset ;
24+ import java .nio .charset .CharsetEncoder ;
25+ import java .nio .charset .CoderResult ;
26+ import java .nio .charset .CodingErrorAction ;
2027import java .util .Arrays ;
2128import java .util .Locale ;
2229import java .util .Objects ;
@@ -36,7 +43,12 @@ public enum FileSystem {
3643 /**
3744 * Generic file system.
3845 */
39- GENERIC (4096 , false , false , Integer .MAX_VALUE , Integer .MAX_VALUE , new int [] { 0 }, new String [] {}, false , false , '/' ),
46+ GENERIC (4096 , false , false , 1020 , 1024 * 1024 , new int [] {
47+ // @formatter:off
48+ // ASCII NUL
49+ 0
50+ // @formatter:on
51+ }, new String [] {}, false , false , '/' , LengthUnit .BYTES ),
4052
4153 /**
4254 * Linux file system.
@@ -48,7 +60,7 @@ public enum FileSystem {
4860 0 ,
4961 '/'
5062 // @formatter:on
51- }, new String [] {}, false , false , '/' ),
63+ }, new String [] {}, false , false , '/' , LengthUnit . BYTES ),
5264
5365 /**
5466 * MacOS file system.
@@ -61,7 +73,7 @@ public enum FileSystem {
6173 '/' ,
6274 ':'
6375 // @formatter:on
64- }, new String [] {}, false , false , '/' ),
76+ }, new String [] {}, false , false , '/' , LengthUnit . CHARS ),
6577
6678 /**
6779 * Windows file system.
@@ -78,7 +90,7 @@ public enum FileSystem {
7890 */
7991 // @formatter:off
8092 WINDOWS (4096 , false , true ,
81- 255 , 32000 , // KEEP THIS ARRAY SORTED!
93+ 255 , 32767 , // KEEP THIS ARRAY SORTED!
8294 new int [] {
8395 // KEEP THIS ARRAY SORTED!
8496 // ASCII NUL
@@ -95,7 +107,7 @@ public enum FileSystem {
95107 "LPT1" , "LPT2" , "LPT3" , "LPT4" , "LPT5" , "LPT6" , "LPT7" , "LPT8" , "LPT9" ,
96108 "LPT\u00b2 " , "LPT\u00b3 " , "LPT\u00b9 " , // Superscript 2 3 1 in that order
97109 "NUL" , "PRN"
98- }, true , true , '\\' );
110+ }, true , true , '\\' , LengthUnit . CHARS );
99111 // @formatter:on
100112
101113 /**
@@ -315,6 +327,7 @@ private static String replace(final String path, final char oldChar, final char
315327 private final boolean supportsDriveLetter ;
316328 private final char nameSeparator ;
317329 private final char nameSeparatorOther ;
330+ private final LengthUnit lengthUnit ;
318331
319332 /**
320333 * Constructs a new instance.
@@ -329,10 +342,12 @@ private static String replace(final String path, final char oldChar, final char
329342 * @param reservedFileNamesExtensions TODO
330343 * @param supportsDriveLetter Whether this file system support driver letters.
331344 * @param nameSeparator The name separator, '\\' on Windows, '/' on Linux.
345+ * @param lengthUnit The unit of measurement for length limits.
332346 */
333347 FileSystem (final int blockSize , final boolean caseSensitive , final boolean casePreserving ,
334348 final int maxFileLength , final int maxPathLength , final int [] illegalFileNameChars ,
335- final String [] reservedFileNames , final boolean reservedFileNamesExtensions , final boolean supportsDriveLetter , final char nameSeparator ) {
349+ final String [] reservedFileNames , final boolean reservedFileNamesExtensions , final boolean supportsDriveLetter ,
350+ final char nameSeparator , final LengthUnit lengthUnit ) {
336351 this .blockSize = blockSize ;
337352 this .maxFileNameLength = maxFileLength ;
338353 this .maxPathLength = maxPathLength ;
@@ -345,6 +360,7 @@ private static String replace(final String path, final char oldChar, final char
345360 this .supportsDriveLetter = supportsDriveLetter ;
346361 this .nameSeparator = nameSeparator ;
347362 this .nameSeparatorOther = FilenameUtils .flipSeparator (nameSeparator );
363+ this .lengthUnit = lengthUnit ;
348364 }
349365
350366 /**
@@ -380,23 +396,45 @@ public int[] getIllegalFileNameCodePoints() {
380396 }
381397
382398 /**
383- * Gets the maximum length for file names. The file name does not include folders.
399+ * Gets the maximum length for file names (excluding any folder path).
400+ *
401+ * <p><strong>Note:</strong> This excludes any folder path. The unit depends on the
402+ * filesystem or OS; see {@link #getLengthUnit()} to check whether the value is in
403+ * bytes or UTF-16 characters.</p>
384404 *
385- * @return the maximum length for file names .
405+ * @return the maximum file name length .
386406 */
387407 public int getMaxFileNameLength () {
388408 return maxFileNameLength ;
389409 }
390410
391411 /**
392- * Gets the maximum length of the path to a file. This can include folders.
412+ * Gets the maximum length for file paths (may include folders) .
393413 *
394- * @return the maximum length of the path to a file.
414+ * <p><strong>Note:</strong> This may include folder names as well as the file name.
415+ * The unit is the same as {@link #getMaxFileNameLength()} and can be obtained
416+ * from {@link #getLengthUnit()}.</p>
417+ *
418+ * @return the maximum file path length.
395419 */
396420 public int getMaxPathLength () {
397421 return maxPathLength ;
398422 }
399423
424+ /**
425+ * Gets the unit of measurement for length limits.
426+ *
427+ * <p>Depending on the platform, limits may be expressed in bytes or in UTF-16
428+ * characters.</p>
429+ *
430+ * @return the unit for file name and path length limits.
431+ * @since 2.21.0
432+ */
433+ public LengthUnit getLengthUnit () {
434+ return lengthUnit ;
435+ }
436+
437+
400438 /**
401439 * Gets the name separator, '\\' on Windows, '/' on Linux.
402440 *
@@ -446,16 +484,42 @@ private boolean isIllegalFileNameChar(final int c) {
446484 }
447485
448486 /**
449- * Tests if a candidate file name (without a path) such as {@code "filename.ext"} or {@code "filename"} is a
450- * potentially legal file name. If the file name length exceeds {@link #getMaxFileNameLength()}, or if it contains
451- * an illegal character then the check fails.
487+ * Tests if a candidate file name (without a path) is a legal file name.
488+ *
489+ * <p>Takes a file name like {@code "filename.ext"} or {@code "filename"} and checks:</p>
490+ * <ul>
491+ * <li>if the file name length is legal</li>
492+ * <li>if the file name is not a reserved file name</li>
493+ * <li>if the file name does not contain illegal characters</li>
494+ * </ul>
452495 *
453496 * @param candidate
454- * a candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}
497+ * A candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}
455498 * @return {@code true} if the candidate name is legal
456499 */
457500 public boolean isLegalFileName (final CharSequence candidate ) {
458- if (candidate == null || candidate .length () == 0 || candidate .length () > maxFileNameLength ) {
501+ return isLegalFileName (candidate , Charset .defaultCharset ());
502+ }
503+
504+ /**
505+ * Tests if a candidate file name (without a path) is a legal file name.
506+ *
507+ * <p>Takes a file name like {@code "filename.ext"} or {@code "filename"} and checks:</p>
508+ * <ul>
509+ * <li>if the file name length is legal</li>
510+ * <li>if the file name is not a reserved file name</li>
511+ * <li>if the file name does not contain illegal characters</li>
512+ * </ul>
513+ *
514+ * @param candidate
515+ * A candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}
516+ * @param charset
517+ * The charset to use when the file name length is measured in bytes
518+ * @return {@code true} if the candidate name is legal
519+ * @since 2.21.0
520+ */
521+ public boolean isLegalFileName (final CharSequence candidate , final Charset charset ) {
522+ if (!isLegalFileLength (candidate , charset )) {
459523 return false ;
460524 }
461525 if (isReservedFileName (candidate )) {
@@ -504,24 +568,53 @@ public boolean supportsDriveLetter() {
504568 }
505569
506570 /**
507- * Converts a candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"} to a legal file
508- * name. Illegal characters in the candidate name are replaced by the {@code replacement} character. If the file
509- * name length exceeds {@link #getMaxFileNameLength()}, then the name is truncated to
510- * {@link #getMaxFileNameLength()}.
571+ * Converts a candidate file name (without a path) to a legal file name.
572+ *
573+ * <p>Takes a file name like {@code "filename.ext"} or {@code "filename"} and:</p>
574+ * <ul>
575+ * <li>replaces illegal characters by the given replacement character</li>
576+ * <li>truncates the name to {@link #getMaxFileNameLength()} if necessary</li>
577+ * </ul>
511578 *
512579 * @param candidate
513- * a candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}
580+ * A candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}
514581 * @param replacement
515582 * Illegal characters in the candidate name are replaced by this character
516583 * @return a String without illegal characters
517584 */
518585 public String toLegalFileName (final String candidate , final char replacement ) {
586+ return toLegalFileName (candidate , replacement , Charset .defaultCharset ());
587+ }
588+
589+ /**
590+ * Converts a candidate file name (without a path) to a legal file name.
591+ *
592+ * <p>Takes a file name like {@code "filename.ext"} or {@code "filename"} and:</p>
593+ * <ul>
594+ * <li>replaces illegal characters by the given replacement character</li>
595+ * <li>truncates the name to {@link #getMaxFileNameLength()} if necessary</li>
596+ * </ul>
597+ *
598+ * @param candidate
599+ * A candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}
600+ * @param replacement
601+ * Illegal characters in the candidate name are replaced by this character
602+ * @param charset
603+ * The charset to use when the file name length is measured in bytes
604+ * @return a String without illegal characters
605+ * @since 2.21.0
606+ */
607+ public String toLegalFileName (final String candidate , final char replacement , final Charset charset ) {
608+ Objects .requireNonNull (candidate , "candidate" );
609+ if (candidate .isEmpty ()) {
610+ throw new IllegalArgumentException ("The candidate file name is empty" );
611+ }
519612 if (isIllegalFileNameChar (replacement )) {
520613 // %s does not work properly with NUL
521614 throw new IllegalArgumentException (String .format ("The replacement character '%s' cannot be one of the %s illegal characters: %s" ,
522615 replacement == '\0' ? "\\ 0" : replacement , name (), Arrays .toString (illegalFileNameChars )));
523616 }
524- final String truncated = candidate . length () > maxFileNameLength ? candidate . substring ( 0 , maxFileNameLength ) : candidate ;
617+ final CharSequence truncated = truncateFileName ( candidate , charset ) ;
525618 final int [] array = truncated .chars ().map (i -> isIllegalFileNameChar (i ) ? replacement : i ).toArray ();
526619 return new String (array , 0 , array .length );
527620 }
@@ -530,4 +623,76 @@ CharSequence trimExtension(final CharSequence cs) {
530623 final int index = indexOf (cs , '.' , 0 );
531624 return index < 0 ? cs : cs .subSequence (0 , index );
532625 }
626+
627+ private boolean isLegalFileLength (final CharSequence candidate , final Charset charset ) {
628+ if (candidate == null || candidate .length () == 0 ) {
629+ return false ;
630+ }
631+ if (lengthUnit == LengthUnit .CHARS ) {
632+ return candidate .length () <= getMaxFileNameLength ();
633+ }
634+ final CharsetEncoder encoder = charset .newEncoder ();
635+ try {
636+ final ByteBuffer buffer = encoder .encode (CharBuffer .wrap (candidate ));
637+ return buffer .remaining () <= getMaxFileNameLength ();
638+ } catch (CharacterCodingException e ) {
639+ // If we can't encode, it's not legal
640+ return false ;
641+ }
642+ }
643+
644+ CharSequence truncateFileName (final CharSequence candidate , final Charset charset ) {
645+ final int maxFileNameLength = getMaxFileNameLength ();
646+ // Character-based limit: simple substring if needed.
647+ if (lengthUnit == LengthUnit .CHARS ) {
648+ return candidate .length () <= maxFileNameLength ? candidate : candidate .subSequence (0 , maxFileNameLength );
649+ }
650+
651+ // Byte-based limit
652+ return truncateByBytes (candidate , charset , maxFileNameLength );
653+ }
654+
655+ static CharSequence truncateByBytes (final CharSequence candidate , final Charset charset , final int maxBytes ) {
656+ // Byte-based limit
657+ final CharsetEncoder encoder = charset .newEncoder ()
658+ .onMalformedInput (CodingErrorAction .REPORT )
659+ .onUnmappableCharacter (CodingErrorAction .REPORT );
660+
661+ if (!encoder .canEncode (candidate )) {
662+ throw new IllegalArgumentException (
663+ "File name contains characters that cannot be encoded with charset " + charset .name ());
664+ }
665+
666+ // Fast path: if even the worst-case expansion fits, we're done.
667+ if (candidate .length () <= Math .floor (maxBytes / encoder .maxBytesPerChar ())) {
668+ return candidate ;
669+ }
670+
671+ // Slow path: encode into a fixed-size byte buffer.
672+ final ByteBuffer out = ByteBuffer .allocate (maxBytes );
673+ final CharBuffer in = CharBuffer .wrap (candidate );
674+
675+ // Encode until the first character that would exceed the byte budget.
676+ final CoderResult cr = encoder .encode (in , out , true );
677+
678+ if (cr .isUnderflow ()) {
679+ // Entire candidate fit within maxFileNameLength bytes.
680+ return candidate ;
681+ }
682+
683+ // We ran out of space mid-encode: truncate BEFORE the offending character.
684+ return candidate .subSequence (0 , in .position ());
685+ }
686+
687+ /**
688+ * Units of length for the file name and path length limits.
689+ *
690+ * @since 2.21.0
691+ */
692+ public enum LengthUnit {
693+ /** Length in bytes. */
694+ BYTES ,
695+ /** Length in UTF-16 characters. */
696+ CHARS ;
697+ }
533698}
0 commit comments