Skip to content

Commit a00190e

Browse files
authored
Merge pull request #4554 from evolvedbinary/feature/new-xmldiff-impl
New implementation of the XmlDiffModule
2 parents c17fb22 + e86b86f commit a00190e

File tree

8 files changed

+2129
-143
lines changed

8 files changed

+2129
-143
lines changed

extensions/modules/xmldiff/pom.xml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,76 @@
6262
<artifactId>xml-apis</artifactId>
6363
</dependency>
6464

65+
<dependency>
66+
<groupId>com.google.code.findbugs</groupId>
67+
<artifactId>jsr305</artifactId>
68+
</dependency>
69+
70+
<dependency>
71+
<groupId>io.lacuna</groupId>
72+
<artifactId>bifurcan</artifactId>
73+
</dependency>
74+
75+
<dependency>
76+
<groupId>junit</groupId>
77+
<artifactId>junit</artifactId>
78+
<scope>test</scope>
79+
</dependency>
80+
6581
</dependencies>
6682

83+
<build>
84+
<testResources>
85+
<testResource>
86+
<directory>src/test/resources</directory>
87+
<filtering>false</filtering>
88+
</testResource>
89+
<testResource>
90+
<directory>src/test/resources-filtered</directory>
91+
<filtering>true</filtering>
92+
</testResource>
93+
</testResources>
94+
95+
<plugins>
96+
<plugin>
97+
<groupId>com.mycila</groupId>
98+
<artifactId>license-maven-plugin</artifactId>
99+
<configuration>
100+
<licenseSets>
101+
102+
<licenseSet>
103+
<!--
104+
eXist-db's License
105+
-->
106+
<header>${project.parent.relativePath}/LGPL-21-license.template.txt</header>
107+
<excludes>
108+
<exclude>src/main/java/org/exist/xquery/modules/xmldiff/Compare.java</exclude>
109+
<exclude>src/main/java/org/exist/xquery/modules/xmldiff/XmlDiffModule.java</exclude>
110+
<exclude>src/test/java/xquery/modules/xmldiff/XmlDiffTests.java</exclude>
111+
<exclude>src/test/xquery/modules/xmldiff/compare.xqm</exclude>
112+
<exclude>src/test/xquery/modules/xmldiff/diff.xqm</exclude>
113+
</excludes>
114+
</licenseSet>
115+
116+
<licenseSet>
117+
<!--
118+
FDB backport to LGPL 2.1-only licensed code
119+
-->
120+
<header>${project.parent.relativePath}/FDB-backport-LGPL-21-ONLY-license.template.txt</header>
121+
<includes>
122+
<include>src/main/java/org/exist/xquery/modules/xmldiff/Compare.java</include>
123+
<include>src/main/java/org/exist/xquery/modules/xmldiff/XmlDiffModule.java</include>
124+
<include>src/test/java/xquery/modules/xmldiff/XmlDiffTests.java</include>
125+
<include>src/test/xquery/modules/xmldiff/compare.xqm</include>
126+
<include>src/test/xquery/modules/xmldiff/diff.xqm</include>
127+
</includes>
128+
129+
</licenseSet>
130+
131+
</licenseSets>
132+
</configuration>
133+
</plugin>
134+
</plugins>
135+
</build>
136+
67137
</project>
Lines changed: 140 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,25 @@
11
/*
2-
* eXist-db Open Source Native XML Database
3-
* Copyright (C) 2001 The eXist-db Authors
2+
* Copyright (C) 2014, Evolved Binary Ltd
43
*
5-
6-
* http://www.exist-db.org
4+
* This file was originally ported from FusionDB to eXist-db by
5+
* Evolved Binary, for the benefit of the eXist-db Open Source community.
6+
* Only the ported code as it appears in this file, at the time that
7+
* it was contributed to eXist-db, was re-licensed under The GNU
8+
* Lesser General Public License v2.1 only for use in eXist-db.
9+
*
10+
* This license grant applies only to a snapshot of the code as it
11+
* appeared when ported, it does not offer or infer any rights to either
12+
* updates of this source code or access to the original source code.
13+
*
14+
* The GNU Lesser General Public License v2.1 only license follows.
15+
*
16+
* ---------------------------------------------------------------------
17+
*
18+
* Copyright (C) 2014, Evolved Binary Ltd
719
*
820
* This library is free software; you can redistribute it and/or
921
* modify it under the terms of the GNU Lesser General Public
10-
* License as published by the Free Software Foundation; either
11-
* version 2.1 of the License, or (at your option) any later version.
22+
* License as published by the Free Software Foundation; version 2.1.
1223
*
1324
* This library is distributed in the hope that it will be useful,
1425
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -21,125 +32,147 @@
2132
*/
2233
package org.exist.xquery.modules.xmldiff;
2334

35+
import io.lacuna.bifurcan.IMap;
36+
import org.exist.dom.persistent.NodeProxy;
37+
import org.exist.xquery.functions.map.MapType;
38+
import org.w3c.dom.Node;
2439
import org.xmlunit.builder.DiffBuilder;
2540
import org.xmlunit.builder.Input;
2641
import org.xmlunit.diff.Diff;
2742

28-
import org.exist.dom.QName;
29-
import org.exist.storage.serializers.Serializer;
3043
import org.exist.xquery.*;
3144
import org.exist.xquery.value.*;
32-
import org.xml.sax.SAXException;
3345

34-
import javax.xml.transform.OutputKeys;
46+
import javax.annotation.Nullable;
3547
import javax.xml.transform.Source;
36-
import java.util.Properties;
48+
49+
import static org.exist.xquery.FunctionDSL.*;
50+
import static org.exist.xquery.modules.xmldiff.XmlDiffModule.functionSignature;
3751

3852
/**
39-
* @author <a href="mailto:[email protected]">Pierrick Brihaye</a>
53+
* Module for comparing XML documents and nodes.
54+
*
55+
* @author <a href="mailto:[email protected]">Adam Retter</a>
4056
*/
41-
public class Compare extends Function {
42-
private final static Properties OUTPUT_PROPERTIES = new Properties();
57+
public class Compare extends BasicFunction {
58+
59+
private static final StringValue EQUIVALENT_MAP_KEY = new StringValue("equivalent");
60+
private static final StringValue POSITION_MAP_KEY = new StringValue("position");
61+
private static final StringValue MESSAGE_MAP_KEY = new StringValue("message");
62+
63+
private static final FunctionParameterSequenceType FS_PARAM_NODE_SET_1 = optManyParam("node-set-1", Type.NODE, "The first node set.");
64+
private static final FunctionParameterSequenceType FS_PARAM_NODE_SET_2 = optManyParam("node-set-2", Type.NODE, "The second node set.");
65+
66+
private static final String FNS_COMPARE = "compare";
67+
private static final String FNS_DIFF = "diff";
4368

44-
static {
45-
OUTPUT_PROPERTIES.setProperty(OutputKeys.INDENT, "no");
46-
OUTPUT_PROPERTIES.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
69+
public static final FunctionSignature FS_COMPARE = functionSignature(
70+
FNS_COMPARE,
71+
"Compares two nodes sets to determine their equivalence." +
72+
"Equivalence is determined in 3 stages, first by sequence length, then equivalent Node types, and finally by XMLUnit Diff.",
73+
returns(Type.BOOLEAN, "Returns true if the node sets $node-set-1 and $node-set-2 are equal, false otherwise. " +
74+
"This function is a simplified version of: " + XmlDiffModule.PREFIX + ":" + FNS_DIFF + "#2 that only returns true or false."),
75+
FS_PARAM_NODE_SET_1,
76+
FS_PARAM_NODE_SET_2
77+
);
78+
79+
public static final FunctionSignature FS_DIFF = functionSignature(
80+
FNS_DIFF,
81+
"Reports on the differences between two nodes sets to determine their equality." +
82+
"Equality is determined in 3 stages, first by sequence length, then equivalent Node types, and finally by XMLUnit Diff for Document and Element nodes, or fn:deep-equals for all other node types.",
83+
returns(Type.MAP, "Returns a map(xs:string, xs:anyAtomicType). When the node sets are equivalent the map is: map {'equivalent': fn:true() }. When the nodesets are not equivalent, the map is structured like: map {'equivalent': fn:false(), 'position': xs:integer, 'message': xs:string}."),
84+
FS_PARAM_NODE_SET_1,
85+
FS_PARAM_NODE_SET_2
86+
);
87+
88+
public Compare(final XQueryContext context, final FunctionSignature signature) {
89+
super(context, signature);
4790
}
48-
49-
public final static FunctionSignature signature = new FunctionSignature(
50-
new QName("compare", XmlDiffModule.NAMESPACE_URI,
51-
XmlDiffModule.PREFIX),
52-
"Returns true() if the two node sets $node-set-1 and $node-set-2 are equal, otherwise false()",
53-
new SequenceType[] {
54-
new FunctionParameterSequenceType("node-set-1", Type.NODE, Cardinality.ZERO_OR_MORE, "the first node set"),
55-
new FunctionParameterSequenceType("node-set-2", Type.NODE, Cardinality.ZERO_OR_MORE, "the second node set") },
56-
new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.ZERO_OR_ONE, "true() if the two node sets $node-set-1 and $node-set-2 are equal, otherwise false()"));
57-
58-
public Compare(XQueryContext context) {
59-
super(context, signature);
60-
}
61-
62-
/*
63-
* (non-Javadoc)
64-
*
65-
* @see org.exist.xquery.BasicFunction#eval(org.exist.xquery.value.Sequence[],
66-
* org.exist.xquery.value.Sequence)
67-
*/
68-
public Sequence eval(Sequence contextSequence, Item contextItem)
69-
throws XPathException {
70-
71-
if (context.getProfiler().isEnabled()) {
72-
context.getProfiler().start(this);
73-
context.getProfiler().message(this, Profiler.DEPENDENCIES,
74-
"DEPENDENCIES",
75-
Dependency.getDependenciesName(this.getDependencies()));
76-
if (contextSequence != null)
77-
context.getProfiler().message(this, Profiler.START_SEQUENCES,
78-
"CONTEXT SEQUENCE", contextSequence);
79-
if (contextItem != null)
80-
context.getProfiler().message(this, Profiler.START_SEQUENCES,
81-
"CONTEXT ITEM", contextItem.toSequence());
82-
}
83-
84-
Expression arg1 = getArgument(0);
85-
Sequence s1 = arg1.eval(contextSequence, contextItem);
86-
87-
Expression arg2 = getArgument(1);
88-
context.pushDocumentContext();
89-
Sequence s2 = arg2.eval(contextSequence, contextItem);
90-
context.popDocumentContext();
91-
92-
if (s1.isEmpty()) {
93-
return BooleanValue.valueOf(s2.isEmpty());
94-
}
95-
else if (s2.isEmpty()) {
96-
return BooleanValue.valueOf(s1.isEmpty());
97-
}
98-
99-
Sequence result = null;
100-
StringBuilder v1 = new StringBuilder();
101-
StringBuilder v2 = new StringBuilder();
102-
try {
103-
if (s1.hasMany()){
104-
for (int i = 0; i < s1.getItemCount(); i++) {
105-
v1.append(serialize((NodeValue) s1.itemAt(i)));
106-
}
91+
92+
@Override
93+
public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
94+
final Sequence nodeSet1 = args[0];
95+
final Sequence nodeSet2 = args[1];
96+
97+
final int itemCount1 = nodeSet1.getItemCount();
98+
final int itemCount2 = nodeSet2.getItemCount();
99+
100+
// first determination - are the sequences of the same length?
101+
if (itemCount1 != itemCount2) {
102+
if (isCalledAs(FNS_COMPARE)) {
103+
return BooleanValue.FALSE;
107104
} else {
108-
v1.append(serialize((NodeValue) s1.itemAt(0)));
105+
return falseMapResult(Math.min(itemCount1, itemCount2), "Sequences are of different lengths: fn:length($node-set-1) eq " + itemCount1 + ", fn:length($node-set-2) eq " + itemCount2 + ".");
109106
}
110-
if (s2.hasMany()) {
111-
for (int i = 0; i < s2.getItemCount(); i++) {
112-
v2.append(serialize((NodeValue) s2.itemAt(i)));
107+
}
108+
109+
// second determination - do the sequences contain the same types?
110+
for (int i = 0; i < itemCount1; i++) {
111+
final Item item1 = nodeSet1.itemAt(i);
112+
final Item item2 = nodeSet2.itemAt(i);
113+
114+
if (item1.getType() != item2.getType()) {
115+
if (isCalledAs(FNS_COMPARE)) {
116+
return BooleanValue.FALSE;
117+
} else {
118+
return falseMapResult(i + 1, "Items are of different types: $node-set-1[" + i + "] as " + Type.getTypeName(item1.getType()) + ", $node-set-2[" + i + "] as " + Type.getTypeName(item2.getType()) + ".");
119+
}
120+
}
121+
}
122+
123+
// third determination - does XMLUnit consider each node in the sequences to be equal
124+
for (int i = 0; i < itemCount1; i++) {
125+
final Node node1 = toNode(nodeSet1.itemAt(i));
126+
final Node node2 = toNode(nodeSet2.itemAt(i));
127+
128+
if (node1 == null || node2 == null) {
129+
throw new XPathException(this, XmlDiffModule.UNSUPPORTED_DOM_IMPLEMENTATION, "Unable to determine DOM implementation of node set item");
130+
}
131+
132+
final Source expected = Input.fromNode(node1).build();
133+
final Source actual = Input.fromNode(node2).build();
134+
135+
final Diff diff = DiffBuilder.compare(expected).withTest(actual)
136+
.checkForIdentical()
137+
.build();
138+
139+
if (diff.hasDifferences()) {
140+
if (isCalledAs(FNS_COMPARE)) {
141+
return BooleanValue.FALSE;
142+
} else {
143+
return falseMapResult(i + 1, diff.toString());
113144
}
114-
} else {
115-
v2.append(serialize((NodeValue) s2.itemAt(0)));
116145
}
146+
}
147+
148+
if (isCalledAs(FNS_COMPARE)) {
149+
return BooleanValue.TRUE;
150+
} else {
151+
return trueMapResult();
152+
}
153+
}
154+
155+
private MapType trueMapResult() {
156+
return new MapType(getContext(), getContext().getDefaultCollator(), EQUIVALENT_MAP_KEY, BooleanValue.TRUE);
157+
}
117158

118-
final Source expected = Input.fromString(v1.toString()).build();
119-
final Source actual = Input.fromString(v2.toString()).build();
120-
final Diff diff = DiffBuilder.compare(expected).withTest(actual)
121-
.checkForIdentical()
122-
.build();
123-
boolean identical = !diff.hasDifferences();
124-
result = new BooleanValue(this, identical);
125-
} catch (Exception e) {
126-
throw new XPathException(this, "An exception occurred while serializing node " +
127-
"for comparison: " + e.getMessage(), e);
128-
}
129-
130-
if (context.getProfiler().isEnabled())
131-
context.getProfiler().end(this, "", result);
132-
133-
return result;
159+
private MapType falseMapResult(final int sequencePosition, final String message) {
160+
final IMap<AtomicValue, Sequence> linearMap = MapType.newLinearMap(getContext().getDefaultCollator());
161+
linearMap.put(EQUIVALENT_MAP_KEY, BooleanValue.FALSE);
162+
linearMap.put(POSITION_MAP_KEY, new IntegerValue(sequencePosition));
163+
linearMap.put(MESSAGE_MAP_KEY, new StringValue(message.trim()));
164+
return new MapType(getContext(), linearMap.forked(), Type.STRING);
134165
}
135-
136-
private String serialize(NodeValue node) throws SAXException {
137-
final Serializer serializer = context.getBroker().borrowSerializer();
138-
try {
139-
serializer.setProperties(OUTPUT_PROPERTIES);
140-
return serializer.serialize(node);
141-
} finally {
142-
context.getBroker().returnSerializer(serializer);
143-
}
166+
167+
private static @Nullable Node toNode(final Item item) {
168+
if (item instanceof Node) {
169+
return (Node) item;
170+
}
171+
172+
if (item instanceof NodeProxy) {
173+
return ((NodeProxy) item).getNode();
174+
}
175+
176+
return null;
144177
}
145178
}

0 commit comments

Comments
 (0)