Skip to content

Commit c16833e

Browse files
committed
Added source file location annotations to Metamorph DOM.
When debugging Metamorph scripts it is helpful to be able to link the data, collector and function objects with their definition in the Metamorph script. This commit annotates the XML DOM that is used to build the actual metamorph object structure with location information about location of each node in the source XML file. The location information is attached as user data to element nodes using the key `http://culturegraph.org/mf/user_data/location` (available via `Location.USER_DATA_ID`). It contains start and end positions of the element in the source file. Implementing this functionality required a complete rewrite of the `DOMLoader` because location information is only available from the SAX parser and not from the DOM builder. Therefore the, implementation uses a `Transformer` to construct a DOM from SAX events. The idea for this implementation comes from http://javacoalface.blogspot.de/2011/04/line-and-column-numbers-in-xml-dom.html In the process of reimplementing the `DOMLoader` it was decided to move the class and its helper classes out of the morph package and into its own utility package due to the number of classes. Since we now have a `util.xml` package the old `XMLUtil` class was moved into this package as well. Note: The default SAX implementation used by OpenJDK 1.7.0.60 includes an old version of Xerces-J (2.7.1) which does not report location information in x-included files correctly. A bug report for updating the implementation exists (see https://bugs.openjdk.java.net/browse/JDK-8038043). However, until this makes it into OpenJDK a newer version of Xerces needs to be added manually to the classpath.
1 parent 3eca5ba commit c16833e

26 files changed

+1143
-162
lines changed

src/main/java/org/culturegraph/mf/morph/AbstractMetamorphDomWalker.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.culturegraph.mf.types.ScopedHashMap;
2929
import org.culturegraph.mf.util.ResourceUtil;
3030
import org.culturegraph.mf.util.StringUtil;
31+
import org.culturegraph.mf.util.xml.DomLoader;
3132
import org.w3c.dom.Document;
3233
import org.w3c.dom.Element;
3334
import org.w3c.dom.NamedNodeMap;

src/main/java/org/culturegraph/mf/morph/DomLoader.java

Lines changed: 0 additions & 152 deletions
This file was deleted.

src/main/java/org/culturegraph/mf/test/TestCase.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
import org.culturegraph.mf.stream.sink.EventList;
2727
import org.culturegraph.mf.stream.sink.StreamValidator;
2828
import org.culturegraph.mf.util.ResourceUtil;
29-
import org.culturegraph.mf.util.XMLUtil;
3029
import org.culturegraph.mf.util.reflection.ObjectFactory;
30+
import org.culturegraph.mf.util.xml.XmlUtil;
3131
import org.w3c.dom.Element;
3232
import org.w3c.dom.NodeList;
3333

@@ -166,8 +166,8 @@ private java.io.Reader getDataFromSource(final String src) {
166166
private java.io.Reader getDataEmbedded(final Element input) {
167167
final String inputType = input.getAttribute(TYPE_ATTR);
168168
if (input.hasChildNodes()) {
169-
if (XMLUtil.isXmlMimeType(inputType)) {
170-
return new StringReader(XMLUtil.nodeListToString(input.getChildNodes()));
169+
if (XmlUtil.isXmlMimeType(inputType)) {
170+
return new StringReader(XmlUtil.nodeListToString(input.getChildNodes()));
171171
}
172172
return new StringReader(input.getTextContent());
173173
}

src/main/java/org/culturegraph/mf/util/ResourceUtil.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
/**
3535
* @author Christoph Böhme <[email protected]>, Markus Michael Geipel
36-
*
36+
*
3737
*/
3838
public final class ResourceUtil {
3939

@@ -44,7 +44,7 @@ private ResourceUtil() {
4444
/**
4545
* first attempts to open resource with name 'name'. On fail attempts to
4646
* open file.
47-
*
47+
*
4848
* @param name
4949
* @return
5050
* @throws FileNotFoundException
@@ -89,10 +89,14 @@ public static Reader getReader(final File file, final String encoding) throws Fi
8989
return new InputStreamReader(getStream(file), encoding);
9090
}
9191

92+
public static URL getResourceUrl(final String name) {
93+
return Thread.currentThread().getContextClassLoader().getResource(name);
94+
}
95+
9296
public static Properties loadProperties(final String location) {
9397
try {
9498
return loadProperties(getStream(location));
95-
} catch (IOException e) {
99+
} catch (final IOException e) {
96100
throw new MetafactureException("'" + location + "' could not be loaded", e);
97101
}
98102
}
@@ -107,7 +111,7 @@ public static Properties loadProperties(final InputStream stream) throws IOExcep
107111
public static Properties loadProperties(final URL url) {
108112
try {
109113
return loadProperties(url.openStream());
110-
} catch (IOException e) {
114+
} catch (final IOException e) {
111115
throw new MetafactureException("'" + url.getPath() + "' could not be loaded", e);
112116
}
113117
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Copyright 2014 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.util.xml;
17+
18+
import org.xml.sax.SAXException;
19+
import org.xml.sax.XMLReader;
20+
21+
/**
22+
* Filters out CDATA events.
23+
*
24+
* Note that the actual contents of the CDATA section is
25+
* not filtered. This filter removes only the events
26+
* marking the start and end of CDATA sections. Their
27+
* contents are still passed as {@code characters} events.
28+
* This filter only hides the fact that the character data
29+
* comes from a CDATA section.
30+
*
31+
* @author Christoph Böhme
32+
*
33+
*/
34+
public final class CDataFilter extends LexicalHandlerXmlFilter {
35+
36+
public CDataFilter(final XMLReader parent) {
37+
super(parent);
38+
}
39+
40+
@Override
41+
public void startCDATA() throws SAXException {
42+
// Do not forward CDATA section events
43+
}
44+
45+
@Override
46+
public void endCDATA() throws SAXException {
47+
// Do not forward CDATA section events
48+
}
49+
50+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Copyright 2014 Christoph Böhme
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.util.xml;
17+
18+
import org.xml.sax.SAXException;
19+
import org.xml.sax.XMLReader;
20+
21+
/**
22+
* Filters out {@code comment} SAX events.
23+
*
24+
* @author Christoph Böhme
25+
*
26+
*/
27+
public final class CommentsFilter extends LexicalHandlerXmlFilter {
28+
29+
public CommentsFilter(final XMLReader parent) {
30+
super(parent);
31+
}
32+
33+
@Override
34+
public void comment(final char[] ch, final int start, final int length) throws SAXException {
35+
// Do not forward comment events
36+
}
37+
38+
}

0 commit comments

Comments
 (0)