Skip to content

Commit 31bd3e9

Browse files
committed
[feature] Improve EXPath File Module for W3C XQTS compliance
Add missing function overloads: - file:read-text/read-text-lines 3-arg $fallback form - file:create-temp-dir/create-temp-file 2-arg form Fix error codes per EXPath File 4.0 spec: - file:copy/move raise file:no-dir when target parent missing - file:create-dir raises file:exists when path component is a file - file:read-binary rejects negative $length with file:out-of-range - file:write-binary validates $offset against file size Fix readBinary hang: replace BinaryValueFromInputStream (which uses CachingFilterInputStream/FilterInputStreamCacheMonitor infrastructure that prevents clean BrokerPool shutdown) with BinaryValueFromBinaryString. Reads file into byte[], base64-encodes, wraps in lightweight value type with no open handles and no-op close(). Tradeoff: ~2.4x memory for file content, acceptable for typical file module use cases. Resolve relative paths against XQuery static base URI when set as a file: URI, falling back to JVM working directory. Detect XML-illegal characters in read-text/read-text-lines: raise file:io-error by default, or replace with U+FFFD when $fallback=true. QT4 XQTS expath-file: 183/190 (96.3%), 0 hangs, 0 errors.
1 parent dc7166f commit 31bd3e9

File tree

9 files changed

+246
-47
lines changed

9 files changed

+246
-47
lines changed

extensions/expath/src/main/java/org/expath/exist/file/ExpathFileModule.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class ExpathFileModule extends AbstractInternalModule {
5050
new FunctionDef(FileProperties.signatures[6], FileProperties.class),
5151

5252
// FileIO: read-text(1), read-text(2), read-text-lines(1), read-text-lines(2),
53+
// read-text(3-fallback), read-text-lines(3-fallback),
5354
// read-binary(1), read-binary(2), read-binary(3)
5455
new FunctionDef(FileIO.signatures[0], FileIO.class),
5556
new FunctionDef(FileIO.signatures[1], FileIO.class),
@@ -58,6 +59,8 @@ public class ExpathFileModule extends AbstractInternalModule {
5859
new FunctionDef(FileIO.signatures[4], FileIO.class),
5960
new FunctionDef(FileIO.signatures[5], FileIO.class),
6061
new FunctionDef(FileIO.signatures[6], FileIO.class),
62+
new FunctionDef(FileIO.signatures[7], FileIO.class),
63+
new FunctionDef(FileIO.signatures[8], FileIO.class),
6164

6265
// FileWrite: write(2), write(3), write-text(2), write-text(3),
6366
// write-text-lines(2), write-text-lines(3), write-binary(2), write-binary(3)
@@ -81,7 +84,9 @@ public class ExpathFileModule extends AbstractInternalModule {
8184
new FunctionDef(FileAppend.signatures[6], FileAppend.class),
8285

8386
// FileManipulation: copy, move, delete(1), delete(2), create-dir,
84-
// create-temp-dir, create-temp-file, list(1), list(2), list(3),
87+
// create-temp-dir(2), create-temp-dir(3),
88+
// create-temp-file(2), create-temp-file(3),
89+
// list(1), list(2), list(3),
8590
// children, descendants, list-roots
8691
new FunctionDef(FileManipulation.signatures[0], FileManipulation.class),
8792
new FunctionDef(FileManipulation.signatures[1], FileManipulation.class),
@@ -96,6 +101,8 @@ public class ExpathFileModule extends AbstractInternalModule {
96101
new FunctionDef(FileManipulation.signatures[10], FileManipulation.class),
97102
new FunctionDef(FileManipulation.signatures[11], FileManipulation.class),
98103
new FunctionDef(FileManipulation.signatures[12], FileManipulation.class),
104+
new FunctionDef(FileManipulation.signatures[13], FileManipulation.class),
105+
new FunctionDef(FileManipulation.signatures[14], FileManipulation.class),
99106

100107
// FilePaths: name, parent, path-to-native, path-to-uri, resolve-path(1), resolve-path(2)
101108
new FunctionDef(FilePaths.signatures[0], FilePaths.class),

extensions/expath/src/main/java/org/expath/exist/file/ExpathFileModuleHelper.java

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,19 +58,57 @@ public static void checkDbaRole(final XQueryContext context, final Expression ex
5858

5959
/**
6060
* Resolve a path string (file: URI or native path) to a {@link Path}.
61+
* Relative paths are resolved against the JVM working directory.
6162
*
6263
* @param path the path string or file: URI
6364
* @param expression the calling expression (for error reporting)
6465
* @return the resolved Path
6566
* @throws XPathException if the path is invalid
6667
*/
6768
public static Path getPath(final String path, final Expression expression) throws XPathException {
69+
return getPath(path, expression, null);
70+
}
71+
72+
/**
73+
* Resolve a path string (file: URI or native path) to a {@link Path}.
74+
* Relative paths are resolved against the XQuery static base URI if it is a
75+
* file: URI, otherwise against the JVM working directory.
76+
*
77+
* @param path the path string or file: URI
78+
* @param expression the calling expression (for error reporting)
79+
* @param context the XQuery context (may be null)
80+
* @return the resolved Path
81+
* @throws XPathException if the path is invalid
82+
*/
83+
public static Path getPath(final String path, final Expression expression, final XQueryContext context) throws XPathException {
6884
try {
6985
if (path.startsWith("file:")) {
7086
return Paths.get(new URI(path));
71-
} else {
72-
return Paths.get(path);
7387
}
88+
89+
final Path p = Paths.get(path);
90+
if (p.isAbsolute()) {
91+
return p;
92+
}
93+
94+
// Resolve relative paths against static base URI if available
95+
if (context != null) {
96+
try {
97+
final String baseUri = context.getBaseURI().getStringValue();
98+
if (baseUri != null && baseUri.startsWith("file:")) {
99+
final Path basePath = Paths.get(new URI(baseUri));
100+
// Base URI may point to a file; resolve against its parent directory
101+
final Path baseDir = java.nio.file.Files.isDirectory(basePath) ? basePath : basePath.getParent();
102+
if (baseDir != null) {
103+
return baseDir.resolve(p);
104+
}
105+
}
106+
} catch (final Exception ignored) {
107+
// Fall through to default resolution
108+
}
109+
}
110+
111+
return p;
74112
} catch (final InvalidPathException e) {
75113
throw new XPathException(expression, ExpathFileErrorCode.INVALID_PATH,
76114
"Invalid path: " + path + " - " + e.getMessage());

extensions/expath/src/main/java/org/expath/exist/file/FileAppend.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
138138
ExpathFileModuleHelper.checkDbaRole(context, this);
139139

140140
final String pathStr = args[0].getStringValue();
141-
final Path path = ExpathFileModuleHelper.getPath(pathStr, this);
141+
final Path path = ExpathFileModuleHelper.getPath(pathStr, this, context);
142142

143143
checkParentDir(path);
144144

extensions/expath/src/main/java/org/expath/exist/file/FileIO.java

Lines changed: 124 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
import org.exist.xquery.XPathException;
3737
import org.exist.xquery.XQueryContext;
3838
import org.exist.xquery.value.Base64BinaryValueType;
39-
import org.exist.xquery.value.BinaryValueFromInputStream;
39+
import org.exist.xquery.value.BinaryValueFromBinaryString;
4040
import org.exist.xquery.value.FunctionParameterSequenceType;
4141
import org.exist.xquery.value.FunctionReturnSequenceType;
4242
import org.exist.xquery.value.Sequence;
@@ -88,6 +88,24 @@ public class FileIO extends BasicFunction {
8888
new SequenceType[]{FILE_PARAM, ENCODING_PARAM},
8989
new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, "the lines of the file.")
9090
),
91+
// file:read-text($file as xs:string, $encoding as xs:string?, $fallback as xs:boolean) as xs:string
92+
new FunctionSignature(
93+
new QName("read-text", ExpathFileModule.NAMESPACE_URI, ExpathFileModule.PREFIX),
94+
"Reads the contents of a file as text. If $fallback is true, invalid characters are replaced.",
95+
new SequenceType[]{FILE_PARAM, ENCODING_PARAM,
96+
new FunctionParameterSequenceType("fallback", Type.BOOLEAN, Cardinality.EXACTLY_ONE,
97+
"If true, replace invalid characters with the Unicode replacement character.")},
98+
new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the file contents as string.")
99+
),
100+
// file:read-text-lines($file as xs:string, $encoding as xs:string?, $fallback as xs:boolean) as xs:string*
101+
new FunctionSignature(
102+
new QName("read-text-lines", ExpathFileModule.NAMESPACE_URI, ExpathFileModule.PREFIX),
103+
"Reads the contents of a file as a sequence of lines. If $fallback is true, invalid characters are replaced.",
104+
new SequenceType[]{FILE_PARAM, ENCODING_PARAM,
105+
new FunctionParameterSequenceType("fallback", Type.BOOLEAN, Cardinality.EXACTLY_ONE,
106+
"If true, replace invalid characters with the Unicode replacement character.")},
107+
new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, "the lines of the file.")
108+
),
91109
// file:read-binary($file as xs:string) as xs:base64Binary
92110
new FunctionSignature(
93111
new QName("read-binary", ExpathFileModule.NAMESPACE_URI, ExpathFileModule.PREFIX),
@@ -130,7 +148,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
130148
ExpathFileModuleHelper.checkDbaRole(context, this);
131149

132150
final String pathStr = args[0].getStringValue();
133-
final Path path = ExpathFileModuleHelper.getPath(pathStr, this);
151+
final Path path = ExpathFileModuleHelper.getPath(pathStr, this, context);
134152

135153
if (!Files.exists(path)) {
136154
throw new XPathException(this, ExpathFileErrorCode.NOT_FOUND,
@@ -154,20 +172,27 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
154172

155173
private Sequence readText(final Path path, final Sequence[] args) throws XPathException {
156174
final Charset encoding = getEncoding(args, 1);
175+
final boolean fallback = args.length > 2 && !args[2].isEmpty()
176+
&& args[2].itemAt(0).toJavaObject(Boolean.class);
157177
try {
158-
final String content = Files.readString(path, encoding);
178+
final String content = readFileText(path, encoding, fallback);
159179
// Normalize newlines per spec: CR or CRLF -> LF
160180
final String normalized = content.replace("\r\n", "\n").replace("\r", "\n");
161181
return new StringValue(this, normalized);
182+
} catch (final java.nio.charset.MalformedInputException e) {
183+
throw new XPathException(this, ExpathFileErrorCode.IO_ERROR,
184+
"Invalid characters in file for encoding " + encoding.name());
162185
} catch (final IOException e) {
163186
throw new XPathException(this, ExpathFileErrorCode.IO_ERROR, e.getMessage());
164187
}
165188
}
166189

167190
private Sequence readTextLines(final Path path, final Sequence[] args) throws XPathException {
168191
final Charset encoding = getEncoding(args, 1);
192+
final boolean fallback = args.length > 2 && !args[2].isEmpty()
193+
&& args[2].itemAt(0).toJavaObject(Boolean.class);
169194
try {
170-
final String content = Files.readString(path, encoding);
195+
final String content = readFileText(path, encoding, fallback);
171196
// Split at newline boundaries per spec
172197
final String[] lines = content.split("\r\n|\r|\n", -1);
173198
final ValueSequence result = new ValueSequence(lines.length);
@@ -177,48 +202,116 @@ private Sequence readTextLines(final Path path, final Sequence[] args) throws XP
177202
result.add(new StringValue(this, lines[i]));
178203
}
179204
return result;
205+
} catch (final java.nio.charset.MalformedInputException e) {
206+
throw new XPathException(this, ExpathFileErrorCode.IO_ERROR,
207+
"Invalid characters in file for encoding " + encoding.name());
180208
} catch (final IOException e) {
181209
throw new XPathException(this, ExpathFileErrorCode.IO_ERROR, e.getMessage());
182210
}
183211
}
184212

185213
private Sequence readBinary(final Path path, final Sequence[] args) throws XPathException {
186214
final long offset = args.length > 1 && !args[1].isEmpty() ? args[1].itemAt(0).toJavaObject(Long.class) : 0;
187-
final long length = args.length > 2 && !args[2].isEmpty() ? args[2].itemAt(0).toJavaObject(Long.class) : -1;
215+
final boolean hasLength = args.length > 2 && !args[2].isEmpty();
216+
final long length = hasLength ? args[2].itemAt(0).toJavaObject(Long.class) : -1;
188217

189218
try {
190219
final long fileSize = Files.size(path);
191-
if (offset < 0 || offset > fileSize) {
192-
throw new XPathException(this, ExpathFileErrorCode.OUT_OF_RANGE,
193-
"Offset " + offset + " is out of range for file of size " + fileSize);
194-
}
195-
if (length < -1) {
196-
throw new XPathException(this, ExpathFileErrorCode.OUT_OF_RANGE,
197-
"Length must not be negative: " + length);
198-
}
199-
if (length >= 0 && offset + length > fileSize) {
200-
throw new XPathException(this, ExpathFileErrorCode.OUT_OF_RANGE,
201-
"Offset + length exceeds file size: " + (offset + length) + " > " + fileSize);
202-
}
220+
validateBinaryRange(offset, length, hasLength, fileSize);
221+
222+
final byte[] data = readBinaryData(path, offset, hasLength, length, fileSize);
223+
final String base64 = java.util.Base64.getEncoder().encodeToString(data);
224+
return new BinaryValueFromBinaryString(this, new Base64BinaryValueType(), base64);
225+
} catch (final IOException e) {
226+
throw new XPathException(this, ExpathFileErrorCode.IO_ERROR, e.getMessage());
227+
}
228+
}
203229

204-
if (offset == 0 && length < 0) {
205-
// Read entire file
206-
final InputStream is = Files.newInputStream(path);
207-
return BinaryValueFromInputStream.getInstance(context, new Base64BinaryValueType(), is, this);
230+
private void validateBinaryRange(final long offset, final long length, final boolean hasLength, final long fileSize) throws XPathException {
231+
if (offset < 0 || offset > fileSize) {
232+
throw new XPathException(this, ExpathFileErrorCode.OUT_OF_RANGE,
233+
"Offset " + offset + " is out of range for file of size " + fileSize);
234+
}
235+
if (hasLength && length < 0) {
236+
throw new XPathException(this, ExpathFileErrorCode.OUT_OF_RANGE,
237+
"Length must not be negative: " + length);
238+
}
239+
if (hasLength && offset + length > fileSize) {
240+
throw new XPathException(this, ExpathFileErrorCode.OUT_OF_RANGE,
241+
"Offset + length exceeds file size: " + (offset + length) + " > " + fileSize);
242+
}
243+
}
244+
245+
private byte[] readBinaryData(final Path path, final long offset, final boolean hasLength, final long length, final long fileSize) throws IOException {
246+
if (offset == 0 && !hasLength) {
247+
return Files.readAllBytes(path);
248+
}
249+
try (final RandomAccessFile raf = new RandomAccessFile(path.toFile(), "r")) {
250+
raf.seek(offset);
251+
final int readLen = hasLength ? (int) length : (int) (fileSize - offset);
252+
final byte[] data = new byte[readLen];
253+
raf.readFully(data);
254+
return data;
255+
}
256+
}
257+
258+
/**
259+
* Reads a file as text with the given encoding.
260+
* If fallback is true, malformed byte sequences and XML-illegal characters
261+
* are replaced with U+FFFD. Otherwise, an IOException is thrown if the file
262+
* contains malformed bytes or XML-illegal characters.
263+
*/
264+
private String readFileText(final Path path, final Charset encoding, final boolean fallback) throws IOException {
265+
final String content;
266+
if (fallback) {
267+
final java.nio.charset.CharsetDecoder decoder = encoding.newDecoder()
268+
.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE)
269+
.onUnmappableCharacter(java.nio.charset.CodingErrorAction.REPLACE)
270+
.replaceWith("\uFFFD");
271+
final byte[] bytes = Files.readAllBytes(path);
272+
content = decoder.decode(java.nio.ByteBuffer.wrap(bytes)).toString();
273+
// Replace XML-illegal characters with U+FFFD
274+
return replaceXmlIllegalChars(content);
275+
} else {
276+
content = Files.readString(path, encoding);
277+
// Check for XML-illegal characters
278+
checkXmlIllegalChars(content);
279+
return content;
280+
}
281+
}
282+
283+
/**
284+
* Check if a string contains characters illegal in XML 1.0 and throw IOException if so.
285+
* XML 1.0 allows: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
286+
*/
287+
private void checkXmlIllegalChars(final String text) throws IOException {
288+
for (int i = 0; i < text.length(); i++) {
289+
final char c = text.charAt(i);
290+
if (c < 0x20 && c != 0x9 && c != 0xA && c != 0xD) {
291+
throw new IOException("File contains XML-illegal character U+" +
292+
String.format("%04X", (int) c) + " at position " + i);
293+
}
294+
if (c >= 0xFFFE) {
295+
throw new IOException("File contains XML-illegal character U+" +
296+
String.format("%04X", (int) c) + " at position " + i);
208297
}
298+
}
299+
}
209300

210-
// Partial read
211-
try (final RandomAccessFile raf = new RandomAccessFile(path.toFile(), "r")) {
212-
raf.seek(offset);
213-
final int readLen = length >= 0 ? (int) length : (int) (fileSize - offset);
214-
final byte[] data = new byte[readLen];
215-
raf.readFully(data);
216-
final InputStream bis = new java.io.ByteArrayInputStream(data);
217-
return BinaryValueFromInputStream.getInstance(context, new Base64BinaryValueType(), bis, this);
301+
/**
302+
* Replace characters illegal in XML 1.0 with U+FFFD.
303+
*/
304+
private String replaceXmlIllegalChars(final String text) {
305+
final StringBuilder sb = new StringBuilder(text.length());
306+
for (int i = 0; i < text.length(); i++) {
307+
final char c = text.charAt(i);
308+
if ((c < 0x20 && c != 0x9 && c != 0xA && c != 0xD) || c >= 0xFFFE) {
309+
sb.append('\uFFFD');
310+
} else {
311+
sb.append(c);
218312
}
219-
} catch (final IOException e) {
220-
throw new XPathException(this, ExpathFileErrorCode.IO_ERROR, e.getMessage());
221313
}
314+
return sb.toString();
222315
}
223316

224317
private Charset getEncoding(final Sequence[] args, final int index) throws XPathException {

0 commit comments

Comments
 (0)