Skip to content

Commit 82ced9d

Browse files
committed
[bugfix] Compressing persistent Documents or Elements that have non-element Child Nodes, previously caused silent corruption of those documents in the generated archive. This is now fixed so that all nodes are correctly compressed.
1 parent fe71ef8 commit 82ced9d

File tree

3 files changed

+163
-29
lines changed

3 files changed

+163
-29
lines changed

extensions/modules/compression/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@
176176
<exclude>pom.xml</exclude>
177177
<exclude>src/test/resources-filtered/conf.xml</exclude>
178178
<exclude>src/test/resources/log4j2.xml</exclude>
179+
<exclude>src/test/xquery/modules/compression/zip-unzip-tests.xqm</exclude>
179180
<exclude>src/main/java/org/exist/xquery/modules/compression/AbstractCompressFunction.java</exclude>
180181
<exclude>src/main/java/org/exist/xquery/modules/compression/EntryFunctions.java</exclude>
181182
</excludes>

extensions/modules/compression/src/main/java/org/exist/xquery/modules/compression/AbstractCompressFunction.java

Lines changed: 79 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.exist.xquery.value.*;
6767
import org.w3c.dom.Element;
6868
import org.w3c.dom.Node;
69+
import org.w3c.dom.NodeList;
6970
import org.xml.sax.SAXException;
7071

7172
import javax.annotation.Nullable;
@@ -311,13 +312,7 @@ private void compressElement(final OutputStream os, final Element element, final
311312
throw new XPathException(this, "Item must be type of xs:anyURI or element entry.");
312313
}
313314

314-
if (element.getChildNodes().getLength() > 1) {
315-
throw new XPathException(this, "Entry content is not valid XML fragment.");
316-
}
317-
318315
String name = element.getAttribute("name");
319-
// if(name == null)
320-
// throw new XPathException(this, "Entry must have name attribute.");
321316

322317
final String type = element.getAttribute("type");
323318
ZipMethod method;
@@ -328,7 +323,7 @@ private void compressElement(final OutputStream os, final Element element, final
328323
}
329324

330325
if ("uri".equals(type)) {
331-
@Nullable final String uri = element.getFirstChild().getNodeValue();
326+
@Nullable final String uri = getChildTextValues(element);
332327
if (isNullOrEmpty(uri)) {
333328
throw new XPathException(this, "Entry with type uri must contain a URI.");
334329
}
@@ -351,32 +346,57 @@ private void compressElement(final OutputStream os, final Element element, final
351346
entry = newEntry(name);
352347

353348
if (!"collection".equals(type)) {
354-
final byte[] value;
355-
final Node content = element.getFirstChild();
349+
final NodeList children = element.getChildNodes();
356350

357-
if (content == null) {
351+
final byte[] value;
352+
if (children.getLength() == 0) {
358353
value = new byte[0];
359354
} else {
360-
if (content.getNodeType() == Node.TEXT_NODE) {
361-
String text = content.getNodeValue();
362-
if ("binary".equals(type)) {
363-
//base64 binary
364-
value = Base64.decodeBase64(text);
365-
} else {
366-
//text
367-
value = text.getBytes();
355+
@Nullable Serializer serializer = null;
356+
try (final UnsynchronizedByteArrayOutputStream baos = new UnsynchronizedByteArrayOutputStream()) {
357+
int elementCount = 0;
358+
for (int i = 0; i < children.getLength(); i++) {
359+
final Node child = children.item(i);
360+
361+
if (child.getNodeType() == Node.TEXT_NODE) {
362+
final String text = child.getNodeValue();
363+
if ("binary".equals(type)) {
364+
// Base64 encoded binary
365+
baos.write(Base64.decodeBase64(text));
366+
} else {
367+
// Text
368+
baos.write(text.getBytes());
369+
}
370+
} else {
371+
if (child.getNodeType() == Node.ELEMENT_NODE) {
372+
elementCount++;
373+
if (elementCount > 1) {
374+
throw new XPathException(this, "More than one Element is not permitted within an Entry.");
375+
}
376+
}
377+
378+
// XML
379+
if (serializer == null) {
380+
serializer = context.getBroker().borrowSerializer();
381+
serializer.setUser(context.getSubject());
382+
serializer.setProperty("omit-xml-declaration", "no");
383+
getDynamicSerializerOptions(serializer);
384+
} else {
385+
serializer.reset();
386+
serializer.setUser(context.getSubject());
387+
getDynamicSerializerOptions(serializer);
388+
serializer.setProperty("omit-xml-declaration", "yes");
389+
}
390+
391+
sbWriter.getBuilder().setLength(0);
392+
serializer.serialize((NodeValue) child, sbWriter);
393+
baos.write(sbWriter.toString().getBytes(StandardCharsets.UTF_8));
394+
}
368395
}
369-
} else {
370-
//xml
371-
final Serializer serializer = context.getBroker().borrowSerializer();
372-
try {
373-
serializer.setUser(context.getSubject());
374-
serializer.setProperty("omit-xml-declaration", "no");
375-
getDynamicSerializerOptions(serializer);
376-
sbWriter.getBuilder().setLength(0);
377-
serializer.serialize((NodeValue) content, sbWriter);
378-
value = sbWriter.toString().getBytes(StandardCharsets.UTF_8);
379-
} finally {
396+
397+
value = baos.toByteArray();
398+
} finally {
399+
if (serializer != null) {
380400
context.getBroker().returnSerializer(serializer);
381401
}
382402
}
@@ -406,6 +426,36 @@ private void compressElement(final OutputStream os, final Element element, final
406426
}
407427
}
408428

429+
/**
430+
* Get the node values of direct children of the Element
431+
* that are Text nodes.
432+
*
433+
* @param element the element to get child text values from.
434+
*
435+
* @return the value of the direct child Text nodes, or null if there are no such children.
436+
*/
437+
private @Nullable String getChildTextValues(final Element element) {
438+
final NodeList children = element.getChildNodes();
439+
final int childLen = children.getLength();
440+
441+
StringBuilder builder = null;
442+
for (int i = 0; i < childLen; i++) {
443+
final Node child = children.item(i);
444+
if (child.getNodeType() == Node.TEXT_NODE) {
445+
if (builder == null) {
446+
builder = new StringBuilder();
447+
}
448+
builder.append(child.getNodeValue());
449+
}
450+
}
451+
452+
if (builder != null) {
453+
return builder.toString();
454+
}
455+
456+
return null;
457+
}
458+
409459
private void getDynamicSerializerOptions(final Serializer serializer) throws SAXException {
410460
final Option option = context.getOption(Option.SERIALIZE_QNAME);
411461
if (option != null) {
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
(:
2+
: Elemental
3+
: Copyright (C) 2024, Evolved Binary Ltd
4+
:
5+
6+
: https://www.evolvedbinary.com | https://www.elemental.xyz
7+
:
8+
: This library is free software; you can redistribute it and/or
9+
: modify it under the terms of the GNU Lesser General Public
10+
: License as published by the Free Software Foundation; version 2.1.
11+
:
12+
: This library is distributed in the hope that it will be useful,
13+
: but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15+
: Lesser General Public License for more details.
16+
:
17+
: You should have received a copy of the GNU Lesser General Public
18+
: License along with this library; if not, write to the Free Software
19+
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20+
:)
21+
xquery version "3.0";
22+
23+
module namespace zut = "http://exist-db.org/xquery/cache/test/zip-unzip";
24+
25+
import module namespace compression = "http://exist-db.org/xquery/compression";
26+
27+
declare namespace test = "http://exist-db.org/xquery/xqsuite";
28+
declare namespace util = "http://exist-db.org/xquery/util";
29+
declare namespace xmldb = "http://exist-db.org/xquery/xmldb";
30+
31+
declare variable $zut:TEST_COLLECTION_NAME := "zip-unzip-test";
32+
declare variable $zut:TEST_COLLECTION_PATH := "/db/" || $zut:TEST_COLLECTION_NAME;
33+
34+
declare variable $zut:DOC_WITH_PIS_1_NAME := "doc-with-pis-1.xml";
35+
declare variable $zut:DOC_WITH_PIS_1_PATH := $zut:TEST_COLLECTION_PATH || "/" || $zut:DOC_WITH_PIS_1_NAME;
36+
declare variable $zut:DOC_WITH_PIS_1 := document {
37+
<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>,
38+
<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>,
39+
<article xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" version="5.0">
40+
<info>
41+
<title>Integration Testing</title>
42+
<date>2Q19</date>
43+
<keywordset>
44+
<keyword>application-development</keyword>
45+
<keyword>testing</keyword>
46+
</keywordset>
47+
</info>
48+
</article>
49+
};
50+
51+
declare
52+
%test:setUp
53+
function zut:setup() {
54+
xmldb:create-collection("/db", $zut:TEST_COLLECTION_NAME),
55+
xmldb:store($zut:TEST_COLLECTION_PATH, $zut:DOC_WITH_PIS_1_NAME, $zut:DOC_WITH_PIS_1)
56+
};
57+
58+
declare
59+
%test:assertEquals(3)
60+
function zut:entry-of-doc-with-pis() {
61+
let $entries := <entry type="xml" name="{$zut:DOC_WITH_PIS_1_NAME}">{doc($zut:DOC_WITH_PIS_1_PATH)}</entry>
62+
return
63+
let $zip-data := compression:zip($entries, true())
64+
return
65+
let $output-collection-path := $zut:TEST_COLLECTION_PATH || "/entry-of-doc-with-pis-output"
66+
let $_ := compression:unzip($zip-data, compression:no-filter#2, compression:db-store-entry3($output-collection-path))
67+
return
68+
count(doc($output-collection-path || "/" || $zut:DOC_WITH_PIS_1_NAME)/node())
69+
};
70+
71+
declare
72+
%test:assertEquals(3)
73+
function zut:entry-of-doc-child-nodes-including-pis() {
74+
let $entries := <entry type="xml" name="{$zut:DOC_WITH_PIS_1_NAME}">{doc($zut:DOC_WITH_PIS_1_PATH)/node()}</entry>
75+
return
76+
let $zip-data := compression:zip($entries, true())
77+
return
78+
let $output-collection-path := $zut:TEST_COLLECTION_PATH || "/entry-of-doc-child-nodes-including-pis"
79+
let $_ := compression:unzip($zip-data, compression:no-filter#2, compression:db-store-entry3($output-collection-path))
80+
return
81+
count(doc($output-collection-path || "/" || $zut:DOC_WITH_PIS_1_NAME)/node())
82+
};
83+

0 commit comments

Comments
 (0)