Skip to content

Commit 5dd1dff

Browse files
committed
Update new API FileSystem.toLegalFileName(CharSequence, char, Charset)
to use a CharSequence instead of a String for input Add FileSystemTest.testXmlRoundtrip(FileSystem, Path)
1 parent de59d97 commit 5dd1dff

File tree

2 files changed

+109
-36
lines changed

2 files changed

+109
-36
lines changed

β€Žsrc/main/java/org/apache/commons/io/FileSystem.javaβ€Ž

Lines changed: 27 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -138,39 +138,30 @@ CharSequence truncate(final CharSequence value, final int limit, final Charset c
138138
final CharsetEncoder encoder = charset.newEncoder()
139139
.onMalformedInput(CodingErrorAction.REPORT)
140140
.onUnmappableCharacter(CodingErrorAction.REPORT);
141-
142141
if (!encoder.canEncode(value)) {
143-
throw new IllegalArgumentException(
144-
"The value " + value + " cannot be encoded using " + charset.name());
142+
throw new IllegalArgumentException("The value " + value + " cannot be encoded using " + charset.name());
145143
}
146-
147144
// Fast path: if even the worst-case expansion fits, we're done.
148145
if (value.length() <= Math.floor(limit / encoder.maxBytesPerChar())) {
149146
return value;
150147
}
151-
152148
// Slow path: encode into a fixed-size byte buffer.
153149
// 1. Compute length of extension in bytes (if any).
154150
final CharSequence[] parts = splitExtension(value);
155151
final int extensionLength = getLength(parts[1], charset);
156152
if (extensionLength > 0 && extensionLength >= limit) {
157153
// Extension itself does not fit
158-
throw new IllegalArgumentException(
159-
"The extension of " + value + " is too long to fit within " + limit + " bytes");
154+
throw new IllegalArgumentException("The extension of " + value + " is too long to fit within " + limit + " bytes");
160155
}
161-
162156
// 2. Compute the character part that fits within the remaining byte budget.
163157
final ByteBuffer byteBuffer = ByteBuffer.allocate(limit - extensionLength);
164158
final CharBuffer charBuffer = CharBuffer.wrap(parts[0]);
165-
166159
// Encode until the first character that would exceed the byte budget.
167160
final CoderResult cr = encoder.encode(charBuffer, byteBuffer, true);
168-
169161
if (cr.isUnderflow()) {
170162
// Entire candidate fit within maxFileNameLength bytes.
171163
return value;
172164
}
173-
174165
final CharSequence truncated = safeTruncate(value, charBuffer.position());
175166
return extensionLength == 0 ? truncated : truncated.toString() + parts[1];
176167
}
@@ -186,25 +177,20 @@ int getLength(final CharSequence value, final Charset charset) {
186177
@Override
187178
CharSequence truncate(final CharSequence value, final int limit, final Charset charset) {
188179
if (!UTF_16.newEncoder().canEncode(value)) {
189-
throw new IllegalArgumentException(
190-
"The value " + value + " can not be encoded using " + UTF_16.name());
180+
throw new IllegalArgumentException("The value " + value + " can not be encoded using " + UTF_16.name());
191181
}
192-
193182
// Fast path: no truncation needed.
194183
if (value.length() <= limit) {
195184
return value;
196185
}
197-
198186
// Slow path: truncate to limit.
199187
// 1. Compute length of extension in chars (if any).
200188
final CharSequence[] parts = splitExtension(value);
201189
final int extensionLength = parts[1].length();
202190
if (extensionLength > 0 && extensionLength >= limit) {
203191
// Extension itself does not fit
204-
throw new IllegalArgumentException(
205-
"The extension of " + value + " is too long to fit within " + limit + " characters");
192+
throw new IllegalArgumentException("The extension of " + value + " is too long to fit within " + limit + " characters");
206193
}
207-
208194
// 2. Truncate the non-extension part and append the extension (if any).
209195
final CharSequence truncated = safeTruncate(value, limit - extensionLength);
210196
return extensionLength == 0 ? truncated : truncated.toString() + parts[1];
@@ -560,6 +546,10 @@ public int getMaxPathLength() {
560546
return maxPathLength;
561547
}
562548

549+
NameLengthStrategy getNameLengthStrategy() {
550+
return nameLengthStrategy;
551+
}
552+
563553
/**
564554
* Gets the name separator, '\\' on Windows, '/' on Linux.
565555
*
@@ -703,12 +693,27 @@ public boolean supportsDriveLetter() {
703693
* A candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}.
704694
* @param replacement
705695
* Illegal characters in the candidate name are replaced by this character.
696+
* @param charset
697+
* The charset to use when the file name length is measured in bytes.
706698
* @return a String without illegal characters.
699+
* @since 2.21.0
707700
*/
708-
public String toLegalFileName(final String candidate, final char replacement) {
709-
return toLegalFileName(candidate, replacement, Charset.defaultCharset());
701+
public String toLegalFileName(final CharSequence candidate, final char replacement, final Charset charset) {
702+
Objects.requireNonNull(candidate, "candidate");
703+
if (candidate.length() == 0) {
704+
throw new IllegalArgumentException("The candidate file name is empty");
705+
}
706+
if (isIllegalFileNameChar(replacement)) {
707+
// %s does not work properly with NUL
708+
throw new IllegalArgumentException(String.format("The replacement character '%s' cannot be one of the %s illegal characters: %s",
709+
replacement == '\0' ? "\\0" : replacement, name(), Arrays.toString(illegalFileNameChars)));
710+
}
711+
final CharSequence truncated = nameLengthStrategy.truncate(candidate, getMaxFileNameLength(), charset);
712+
final int[] array = truncated.chars().map(i -> isIllegalFileNameChar(i) ? replacement : i).toArray();
713+
return new String(array, 0, array.length);
710714
}
711715

716+
712717
/**
713718
* Converts a candidate file name (without a path) to a legal file name.
714719
*
@@ -722,24 +727,10 @@ public String toLegalFileName(final String candidate, final char replacement) {
722727
* A candidate file name (without a path) like {@code "filename.ext"} or {@code "filename"}.
723728
* @param replacement
724729
* Illegal characters in the candidate name are replaced by this character.
725-
* @param charset
726-
* The charset to use when the file name length is measured in bytes.
727730
* @return a String without illegal characters.
728-
* @since 2.21.0
729731
*/
730-
public String toLegalFileName(final String candidate, final char replacement, final Charset charset) {
731-
Objects.requireNonNull(candidate, "candidate");
732-
if (candidate.isEmpty()) {
733-
throw new IllegalArgumentException("The candidate file name is empty");
734-
}
735-
if (isIllegalFileNameChar(replacement)) {
736-
// %s does not work properly with NUL
737-
throw new IllegalArgumentException(String.format("The replacement character '%s' cannot be one of the %s illegal characters: %s",
738-
replacement == '\0' ? "\\0" : replacement, name(), Arrays.toString(illegalFileNameChars)));
739-
}
740-
final CharSequence truncated = nameLengthStrategy.truncate(candidate, getMaxFileNameLength(), charset);
741-
final int[] array = truncated.chars().map(i -> isIllegalFileNameChar(i) ? replacement : i).toArray();
742-
return new String(array, 0, array.length);
732+
public String toLegalFileName(final String candidate, final char replacement) {
733+
return toLegalFileName(candidate, replacement, Charset.defaultCharset());
743734
}
744735

745736
}

β€Žsrc/test/java/org/apache/commons/io/FileSystemTest.javaβ€Ž

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,28 @@
2727
import static org.junit.jupiter.api.Assertions.assertEquals;
2828
import static org.junit.jupiter.api.Assertions.assertFalse;
2929
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
30+
import static org.junit.jupiter.api.Assertions.assertNotNull;
3031
import static org.junit.jupiter.api.Assertions.assertNotSame;
3132
import static org.junit.jupiter.api.Assertions.assertThrows;
3233
import static org.junit.jupiter.api.Assertions.assertTrue;
3334

35+
import java.io.BufferedReader;
3436
import java.io.FileNotFoundException;
3537
import java.io.IOException;
38+
import java.io.StringReader;
3639
import java.nio.charset.Charset;
3740
import java.nio.file.Files;
3841
import java.nio.file.Path;
42+
import java.util.Objects;
3943
import java.util.stream.Stream;
4044

45+
import javax.xml.parsers.DocumentBuilder;
46+
import javax.xml.parsers.DocumentBuilderFactory;
47+
import javax.xml.parsers.ParserConfigurationException;
48+
4149
import org.apache.commons.io.FileSystem.NameLengthStrategy;
4250
import org.apache.commons.lang3.JavaVersion;
51+
import org.apache.commons.lang3.StringUtils;
4352
import org.apache.commons.lang3.SystemProperties;
4453
import org.apache.commons.lang3.SystemUtils;
4554
import org.junit.jupiter.api.Test;
@@ -48,6 +57,9 @@
4857
import org.junit.jupiter.params.provider.Arguments;
4958
import org.junit.jupiter.params.provider.EnumSource;
5059
import org.junit.jupiter.params.provider.MethodSource;
60+
import org.w3c.dom.Document;
61+
import org.xml.sax.InputSource;
62+
import org.xml.sax.SAXException;
5163

5264
/**
5365
* Tests {@link FileSystem}.
@@ -252,6 +264,20 @@ static Stream<Arguments> testNameLengthStrategyTruncate_Throws() {
252264
Arguments.of(UTF16_CODE_UNITS, 30, CHAR_UTF8_69B, UTF_8, "truncated to 30 characters")));
253265
}
254266

267+
private String parseXmlRootValue(final Path xmlPath, final Charset charset) throws SAXException, IOException, ParserConfigurationException {
268+
final DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
269+
try (BufferedReader reader = Files.newBufferedReader(xmlPath, charset)) {
270+
final Document document = builder.parse(new InputSource(reader));
271+
return document.getDocumentElement().getTextContent();
272+
}
273+
}
274+
275+
private String parseXmlRootValue(final String xmlString) throws SAXException, IOException, ParserConfigurationException {
276+
final DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
277+
final Document document = builder.parse(new InputSource(new StringReader(xmlString)));
278+
return document.getDocumentElement().getTextContent();
279+
}
280+
255281
@Test
256282
void testGetBlockSize() {
257283
assertTrue(FileSystem.getCurrent().getBlockSize() >= 0);
@@ -487,4 +513,60 @@ void testToLegalFileNameWindows() {
487513
assertThrows(IllegalArgumentException.class, () -> fs.toLegalFileName("test", '\0'));
488514
assertThrows(IllegalArgumentException.class, () -> fs.toLegalFileName("test", ':'));
489515
}
516+
517+
@ParameterizedTest
518+
@EnumSource(FileSystem.class)
519+
void testXmlRoundtrip(final FileSystem fs, @TempDir final Path tempDir) throws Exception {
520+
final Charset charset = Charset.defaultCharset();
521+
assertEquals("a", fs.toLegalFileName("a", '_', charset));
522+
assertEquals("abcdefghijklmno", fs.toLegalFileName("abcdefghijklmno", '_', charset));
523+
assertEquals("\u4F60\u597D\u55CE", fs.toLegalFileName("\u4F60\u597D\u55CE", '_', charset));
524+
assertEquals("\u2713\u2714", fs.toLegalFileName("\u2713\u2714", '_', charset));
525+
assertEquals("\uD83D\uDE80\u2728\uD83C\uDF89", fs.toLegalFileName("\uD83D\uDE80\u2728\uD83C\uDF89", '_', charset));
526+
assertEquals("\uD83D\uDE03", fs.toLegalFileName("\uD83D\uDE03", '_', charset));
527+
assertEquals("\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03",
528+
fs.toLegalFileName("\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03\uD83D\uDE03", '_', charset));
529+
for (int i = 1; i <= 100; i++) {
530+
final String name1 = fs.toLegalFileName(StringUtils.repeat("🦊", i), '_', charset);
531+
assertNotNull(name1);
532+
final byte[] name1Bytes = name1.getBytes();
533+
final String xmlString1 = toXmlString(name1, charset);
534+
final Path path = tempDir.resolve(name1);
535+
Files.write(path, xmlString1.getBytes(charset));
536+
final String xmlFromPath = parseXmlRootValue(path, charset);
537+
assertEquals(name1, xmlFromPath, "i = " + i);
538+
final String name2 = new String(name1Bytes, charset);
539+
assertEquals(name1, name2);
540+
final String xmlString2 = toXmlString(name2, charset);
541+
assertEquals(xmlString1, xmlString2);
542+
final String parsedValue = Objects.toString(parseXmlRootValue(xmlString2), "");
543+
assertEquals(name1, parsedValue, "i = " + i);
544+
assertEquals(name2, parsedValue, "i = " + i);
545+
}
546+
for (int i = 1; i <= 100; i++) {
547+
final String name1 = fs.toLegalFileName(fs.getNameLengthStrategy().truncate(
548+
"πŸ‘©πŸ»β€πŸ‘¨πŸ»β€πŸ‘¦πŸ»β€πŸ‘¦πŸ»πŸ‘©πŸΌβ€πŸ‘¨πŸΌβ€πŸ‘¦πŸΌβ€πŸ‘¦πŸΌπŸ‘©πŸ½β€πŸ‘¨πŸ½β€πŸ‘¦πŸ½β€πŸ‘¦πŸ½πŸ‘©πŸΎβ€πŸ‘¨πŸΎβ€πŸ‘¦πŸΎβ€πŸ‘¦πŸΎπŸ‘©πŸΏβ€πŸ‘¨πŸΏβ€πŸ‘¦πŸΏβ€πŸ‘¦πŸΏπŸ‘©πŸ»β€πŸ‘¨πŸ»β€πŸ‘¦πŸ»β€πŸ‘¦πŸ»πŸ‘©πŸΌβ€πŸ‘¨πŸΌβ€πŸ‘¦πŸΌβ€πŸ‘¦πŸΌπŸ‘©πŸ½β€πŸ‘¨πŸ½β€πŸ‘¦πŸ½β€πŸ‘¦πŸ½πŸ‘©πŸΎβ€πŸ‘¨πŸΎβ€πŸ‘¦πŸΎβ€πŸ‘¦πŸΎπŸ‘©πŸΏβ€πŸ‘¨πŸΏβ€πŸ‘¦πŸΏβ€πŸ‘¦πŸΏ",
549+
// TODO hack 100: truncate blows up when it can't.
550+
100 + i, charset), '_', charset);
551+
assertNotNull(name1);
552+
final byte[] name1Bytes = name1.getBytes();
553+
final String xmlString1 = toXmlString(name1, charset);
554+
final Path path = tempDir.resolve(name1);
555+
Files.write(path, xmlString1.getBytes(charset));
556+
final String xmlFromPath = parseXmlRootValue(path, charset);
557+
assertEquals(name1, xmlFromPath, "i = " + i);
558+
final String name2 = new String(name1Bytes, charset);
559+
assertEquals(name1, name2);
560+
final String xmlString2 = toXmlString(name2, charset);
561+
assertEquals(xmlString1, xmlString2);
562+
final String parsedValue = Objects.toString(parseXmlRootValue(xmlString2), "");
563+
assertEquals(name1, parsedValue, "i = " + i);
564+
assertEquals(name2, parsedValue, "i = " + i);
565+
}
566+
}
567+
568+
private String toXmlString(final String s, final Charset charset) {
569+
return String.format("<?xml version=\"1.0\" encoding=\"%s\"?><data>%s</data>", charset.name(), s);
570+
}
571+
490572
}

0 commit comments

Comments
Β (0)