Skip to content

Commit 957b72e

Browse files
committed
[bugfix] Compressing persistent Documents or Elements that have non-element Child Nodes, previously caused silent corruption of those documents in the generated archive. This is now fixed so that all nodes are correctly compressed.
1 parent dd4a117 commit 957b72e

File tree

3 files changed

+163
-29
lines changed

3 files changed

+163
-29
lines changed

extensions/modules/compression/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@
176176
<exclude>pom.xml</exclude>
177177
<exclude>src/test/resources-filtered/conf.xml</exclude>
178178
<exclude>src/test/resources/log4j2.xml</exclude>
179+
<exclude>src/test/xquery/modules/compression/zip-unzip-tests.xqm</exclude>
179180
<exclude>src/main/java/org/exist/xquery/modules/compression/AbstractCompressFunction.java</exclude>
180181
<exclude>src/main/java/org/exist/xquery/modules/compression/EntryFunctions.java</exclude>
181182
</excludes>

extensions/modules/compression/src/main/java/org/exist/xquery/modules/compression/AbstractCompressFunction.java

Lines changed: 79 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.exist.xquery.value.*;
6767
import org.w3c.dom.Element;
6868
import org.w3c.dom.Node;
69+
import org.w3c.dom.NodeList;
6970
import org.xml.sax.SAXException;
7071

7172
import javax.annotation.Nullable;
@@ -310,13 +311,7 @@ private void compressElement(final OutputStream os, final Element element, final
310311
throw new XPathException(this, "Item must be type of xs:anyURI or element entry.");
311312
}
312313

313-
if (element.getChildNodes().getLength() > 1) {
314-
throw new XPathException(this, "Entry content is not valid XML fragment.");
315-
}
316-
317314
String name = element.getAttribute("name");
318-
// if(name == null)
319-
// throw new XPathException(this, "Entry must have name attribute.");
320315

321316
final String type = element.getAttribute("type");
322317
ZipMethod method;
@@ -327,7 +322,7 @@ private void compressElement(final OutputStream os, final Element element, final
327322
}
328323

329324
if ("uri".equals(type)) {
330-
@Nullable final String uri = element.getFirstChild().getNodeValue();
325+
@Nullable final String uri = getChildTextValues(element);
331326
if (isNullOrEmpty(uri)) {
332327
throw new XPathException(this, "Entry with type uri must contain a URI.");
333328
}
@@ -350,32 +345,57 @@ private void compressElement(final OutputStream os, final Element element, final
350345
entry = newEntry(name);
351346

352347
if (!"collection".equals(type)) {
353-
final byte[] value;
354-
final Node content = element.getFirstChild();
348+
final NodeList children = element.getChildNodes();
355349

356-
if (content == null) {
350+
final byte[] value;
351+
if (children.getLength() == 0) {
357352
value = new byte[0];
358353
} else {
359-
if (content.getNodeType() == Node.TEXT_NODE) {
360-
String text = content.getNodeValue();
361-
if ("binary".equals(type)) {
362-
//base64 binary
363-
value = Base64.decodeBase64(text);
364-
} else {
365-
//text
366-
value = text.getBytes();
354+
@Nullable Serializer serializer = null;
355+
try (final UnsynchronizedByteArrayOutputStream baos = new UnsynchronizedByteArrayOutputStream()) {
356+
int elementCount = 0;
357+
for (int i = 0; i < children.getLength(); i++) {
358+
final Node child = children.item(i);
359+
360+
if (child.getNodeType() == Node.TEXT_NODE) {
361+
final String text = child.getNodeValue();
362+
if ("binary".equals(type)) {
363+
// Base64 encoded binary
364+
baos.write(Base64.decodeBase64(text));
365+
} else {
366+
// Text
367+
baos.write(text.getBytes());
368+
}
369+
} else {
370+
if (child.getNodeType() == Node.ELEMENT_NODE) {
371+
elementCount++;
372+
if (elementCount > 1) {
373+
throw new XPathException(this, "More than one Element is not permitted within an Entry.");
374+
}
375+
}
376+
377+
// XML
378+
if (serializer == null) {
379+
serializer = context.getBroker().borrowSerializer();
380+
serializer.setUser(context.getSubject());
381+
serializer.setProperty("omit-xml-declaration", "no");
382+
getDynamicSerializerOptions(serializer);
383+
} else {
384+
serializer.reset();
385+
serializer.setUser(context.getSubject());
386+
getDynamicSerializerOptions(serializer);
387+
serializer.setProperty("omit-xml-declaration", "yes");
388+
}
389+
390+
sbWriter.getBuilder().setLength(0);
391+
serializer.serialize((NodeValue) child, sbWriter);
392+
baos.write(sbWriter.toString().getBytes(StandardCharsets.UTF_8));
393+
}
367394
}
368-
} else {
369-
//xml
370-
final Serializer serializer = context.getBroker().borrowSerializer();
371-
try {
372-
serializer.setUser(context.getSubject());
373-
serializer.setProperty("omit-xml-declaration", "no");
374-
getDynamicSerializerOptions(serializer);
375-
sbWriter.getBuilder().setLength(0);
376-
serializer.serialize((NodeValue) content, sbWriter);
377-
value = sbWriter.toString().getBytes(StandardCharsets.UTF_8);
378-
} finally {
395+
396+
value = baos.toByteArray();
397+
} finally {
398+
if (serializer != null) {
379399
context.getBroker().returnSerializer(serializer);
380400
}
381401
}
@@ -405,6 +425,36 @@ private void compressElement(final OutputStream os, final Element element, final
405425
}
406426
}
407427

428+
/**
429+
* Get the node values of direct children of the Element
430+
* that are Text nodes.
431+
*
432+
* @param element the element to get child text values from.
433+
*
434+
* @return the value of the direct child Text nodes, or null if there are no such children.
435+
*/
436+
private @Nullable String getChildTextValues(final Element element) {
437+
final NodeList children = element.getChildNodes();
438+
final int childLen = children.getLength();
439+
440+
StringBuilder builder = null;
441+
for (int i = 0; i < childLen; i++) {
442+
final Node child = children.item(i);
443+
if (child.getNodeType() == Node.TEXT_NODE) {
444+
if (builder == null) {
445+
builder = new StringBuilder();
446+
}
447+
builder.append(child.getNodeValue());
448+
}
449+
}
450+
451+
if (builder != null) {
452+
return builder.toString();
453+
}
454+
455+
return null;
456+
}
457+
408458
private void getDynamicSerializerOptions(final Serializer serializer) throws SAXException {
409459
final Option option = context.getOption(Option.SERIALIZE_QNAME);
410460
if (option != null) {
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
(:
2+
: Elemental
3+
: Copyright (C) 2024, Evolved Binary Ltd
4+
:
5+
6+
: https://www.evolvedbinary.com | https://www.elemental.xyz
7+
:
8+
: This library is free software; you can redistribute it and/or
9+
: modify it under the terms of the GNU Lesser General Public
10+
: License as published by the Free Software Foundation; version 2.1.
11+
:
12+
: This library is distributed in the hope that it will be useful,
13+
: but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15+
: Lesser General Public License for more details.
16+
:
17+
: You should have received a copy of the GNU Lesser General Public
18+
: License along with this library; if not, write to the Free Software
19+
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20+
:)
21+
xquery version "3.0";
22+
23+
module namespace zut = "http://exist-db.org/xquery/cache/test/zip-unzip";
24+
25+
import module namespace compression = "http://exist-db.org/xquery/compression";
26+
27+
declare namespace test = "http://exist-db.org/xquery/xqsuite";
28+
declare namespace util = "http://exist-db.org/xquery/util";
29+
declare namespace xmldb = "http://exist-db.org/xquery/xmldb";
30+
31+
declare variable $zut:TEST_COLLECTION_NAME := "zip-unzip-test";
32+
declare variable $zut:TEST_COLLECTION_PATH := "/db/" || $zut:TEST_COLLECTION_NAME;
33+
34+
declare variable $zut:DOC_WITH_PIS_1_NAME := "doc-with-pis-1.xml";
35+
declare variable $zut:DOC_WITH_PIS_1_PATH := $zut:TEST_COLLECTION_PATH || "/" || $zut:DOC_WITH_PIS_1_NAME;
36+
declare variable $zut:DOC_WITH_PIS_1 := document {
37+
<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>,
38+
<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>,
39+
<article xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" version="5.0">
40+
<info>
41+
<title>Integration Testing</title>
42+
<date>2Q19</date>
43+
<keywordset>
44+
<keyword>application-development</keyword>
45+
<keyword>testing</keyword>
46+
</keywordset>
47+
</info>
48+
</article>
49+
};
50+
51+
declare
52+
%test:setUp
53+
function zut:setup() {
54+
xmldb:create-collection("/db", $zut:TEST_COLLECTION_NAME),
55+
xmldb:store($zut:TEST_COLLECTION_PATH, $zut:DOC_WITH_PIS_1_NAME, $zut:DOC_WITH_PIS_1)
56+
};
57+
58+
declare
59+
%test:assertEquals(3)
60+
function zut:entry-of-doc-with-pis() {
61+
let $entries := <entry type="xml" name="{$zut:DOC_WITH_PIS_1_NAME}">{doc($zut:DOC_WITH_PIS_1_PATH)}</entry>
62+
return
63+
let $zip-data := compression:zip($entries, true())
64+
return
65+
let $output-collection-path := $zut:TEST_COLLECTION_PATH || "/entry-of-doc-with-pis-output"
66+
let $_ := compression:unzip($zip-data, compression:no-filter#2, compression:db-store-entry3($output-collection-path))
67+
return
68+
count(doc($output-collection-path || "/" || $zut:DOC_WITH_PIS_1_NAME)/node())
69+
};
70+
71+
declare
72+
%test:assertEquals(3)
73+
function zut:entry-of-doc-child-nodes-including-pis() {
74+
let $entries := <entry type="xml" name="{$zut:DOC_WITH_PIS_1_NAME}">{doc($zut:DOC_WITH_PIS_1_PATH)/node()}</entry>
75+
return
76+
let $zip-data := compression:zip($entries, true())
77+
return
78+
let $output-collection-path := $zut:TEST_COLLECTION_PATH || "/entry-of-doc-child-nodes-including-pis"
79+
let $_ := compression:unzip($zip-data, compression:no-filter#2, compression:db-store-entry3($output-collection-path))
80+
return
81+
count(doc($output-collection-path || "/" || $zut:DOC_WITH_PIS_1_NAME)/node())
82+
};
83+

0 commit comments

Comments
 (0)