Skip to content

Commit a3fd2cd

Browse files
authored
Merge pull request #394 from metafacture/379-improveHandlingOfXmlAttributes
Improve handling of XML attributes and element values.
2 parents 8af6c69 + d6e68ff commit a3fd2cd

File tree

9 files changed

+327
-17
lines changed

9 files changed

+327
-17
lines changed

metafacture-biblio/build.gradle

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,10 @@ dependencies {
3131
testImplementation 'junit:junit:4.12'
3232
testImplementation 'org.mockito:mockito-core:2.5.5'
3333
}
34+
35+
test {
36+
testLogging {
37+
showStandardStreams = true
38+
exceptionFormat = 'full'
39+
}
40+
}

metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ public final class MarcXmlHandler extends DefaultXmlPipe<StreamReceiver> {
4545
private static final String NAMESPACE = "http://www.loc.gov/MARC21/slim";
4646
private static final String LEADER = "leader";
4747
private static final String TYPE = "type";
48+
49+
private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER;
4850
private String currentTag = "";
4951
private String namespace = NAMESPACE;
5052
private StringBuilder builder = new StringBuilder();
@@ -60,6 +62,14 @@ private boolean checkNamespace(final String uri) {
6062
return namespace == null || namespace.equals(uri);
6163
}
6264

65+
public void setAttributeMarker(final String attributeMarker) {
66+
this.attributeMarker = attributeMarker;
67+
}
68+
69+
public String getAttributeMarker() {
70+
return attributeMarker;
71+
}
72+
6373
@Override
6474
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException {
6575
if (SUBFIELD.equals(localName)) {
@@ -75,7 +85,7 @@ else if (CONTROLFIELD.equals(localName)) {
7585
}
7686
else if (RECORD.equals(localName) && checkNamespace(uri)) {
7787
getReceiver().startRecord("");
78-
getReceiver().literal(TYPE, attributes.getValue(TYPE));
88+
getReceiver().literal(attributeMarker + TYPE, attributes.getValue(TYPE));
7989
}
8090
else if (LEADER.equals(localName)) {
8191
builder = new StringBuilder();
@@ -87,18 +97,15 @@ else if (LEADER.equals(localName)) {
8797
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
8898
if (SUBFIELD.equals(localName)) {
8999
getReceiver().literal(currentTag, builder.toString().trim());
90-
91100
}
92101
else if (DATAFIELD.equals(localName)) {
93102
getReceiver().endEntity();
94103
}
95104
else if (CONTROLFIELD.equals(localName)) {
96105
getReceiver().literal(currentTag, builder.toString().trim());
97-
98106
}
99107
else if (RECORD.equals(localName) && checkNamespace(uri)) {
100108
getReceiver().endRecord();
101-
102109
}
103110
else if (LEADER.equals(localName)) {
104111
getReceiver().literal(currentTag, builder.toString());

metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
import org.junit.Before;
2424
import org.junit.Test;
2525
import org.metafacture.framework.StreamReceiver;
26+
import org.mockito.InOrder;
2627
import org.mockito.Mock;
28+
import org.mockito.Mockito;
2729
import org.mockito.MockitoAnnotations;
2830
import org.xml.sax.SAXException;
2931
import org.xml.sax.helpers.AttributesImpl;
@@ -130,4 +132,39 @@ public void issue330ShouldOptionallyRecognizeRecordsWithoutNamespace()
130132
verifyNoMoreInteractions(receiver);
131133
}
132134

135+
@Test
136+
public void shouldNotEncodeTypeAttributeAsMarkedLiteral() throws SAXException {
137+
final AttributesImpl attributes = new AttributesImpl();
138+
attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic");
139+
140+
marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes);
141+
marcXmlHandler.endElement(NAMESPACE, RECORD, "");
142+
143+
final InOrder ordered = Mockito.inOrder(receiver);
144+
ordered.verify(receiver).startRecord("");
145+
ordered.verify(receiver).literal(TYPE, "bibliographic");
146+
ordered.verify(receiver).endRecord();
147+
ordered.verifyNoMoreInteractions();
148+
verifyNoMoreInteractions(receiver);
149+
}
150+
151+
@Test
152+
public void issue336_shouldEncodeTypeAttributeAsLiteralWithConfiguredMarker() throws SAXException {
153+
final String marker = "~";
154+
marcXmlHandler.setAttributeMarker(marker);
155+
156+
final AttributesImpl attributes = new AttributesImpl();
157+
attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic");
158+
159+
marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes);
160+
marcXmlHandler.endElement(NAMESPACE, RECORD, "");
161+
162+
final InOrder ordered = Mockito.inOrder(receiver);
163+
ordered.verify(receiver).startRecord("");
164+
ordered.verify(receiver).literal(marker + TYPE, "bibliographic");
165+
ordered.verify(receiver).endRecord();
166+
ordered.verifyNoMoreInteractions();
167+
verifyNoMoreInteractions(receiver);
168+
}
169+
133170
}

metafacture-framework/src/main/java/org/metafacture/framework/helpers/DefaultXmlPipe.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@
3838
*/
3939
public class DefaultXmlPipe<R extends Receiver> extends DefaultSender<R> implements XmlPipe<R> {
4040

41+
public static final String DEFAULT_ATTRIBUTE_MARKER = "";
42+
public static final String DEFAULT_RECORD_TAG = "record";
43+
public static final String DEFAULT_ROOT_TAG = "records";
44+
public static final String DEFAULT_VALUE_TAG = "value";
45+
4146
public DefaultXmlPipe() {
4247
}
4348

metafacture-xml/build.gradle

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,10 @@ dependencies {
2626
testImplementation 'org.mockito:mockito-core:2.5.5'
2727
testRuntimeOnly 'org.slf4j:slf4j-simple:1.7.21'
2828
}
29+
30+
test {
31+
testLogging {
32+
showStandardStreams = true
33+
exceptionFormat = 'full'
34+
}
35+
}

metafacture-xml/src/main/java/org/metafacture/xml/GenericXmlHandler.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,13 @@
4040
@FluxCommand("handle-generic-xml")
4141
public final class GenericXmlHandler extends DefaultXmlPipe<StreamReceiver> {
4242

43-
public static final String DEFAULT_RECORD_TAG = "record";
44-
4543
public static final boolean EMIT_NAMESPACE = false;
4644

4745
private static final Pattern TABS = Pattern.compile("\t+");
4846

47+
private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER;
4948
private String recordTagName = DEFAULT_RECORD_TAG;
49+
private String valueTagName = DEFAULT_VALUE_TAG;
5050

5151
private boolean inRecord;
5252
private StringBuilder valueBuffer = new StringBuilder();
@@ -92,6 +92,14 @@ public String getRecordTagName() {
9292
return recordTagName;
9393
}
9494

95+
public void setValueTagName(final String valueTagName) {
96+
this.valueTagName = valueTagName;
97+
}
98+
99+
public String getValueTagName() {
100+
return valueTagName;
101+
}
102+
95103
/**
96104
* Triggers namespace awareness. If set to "true" input data like "foo:bar"
97105
* will be passed through as "foo:bar". For backward compatibility the default
@@ -110,6 +118,14 @@ public boolean getEmitNamespace() {
110118
return this.emitNamespace;
111119
}
112120

121+
public void setAttributeMarker(final String attributeMarker) {
122+
this.attributeMarker = attributeMarker;
123+
}
124+
125+
public String getAttributeMarker() {
126+
return attributeMarker;
127+
}
128+
113129
@Override
114130
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
115131
if (inRecord) {
@@ -159,7 +175,7 @@ public void characters(final char[] chars, final int start, final int length) {
159175
private void writeValue() {
160176
final String value = valueBuffer.toString();
161177
if (!value.trim().isEmpty()) {
162-
getReceiver().literal("value", value.replace('\n', ' '));
178+
getReceiver().literal(valueTagName, value.replace('\n', ' '));
163179
}
164180
valueBuffer = new StringBuilder();
165181
}
@@ -170,7 +186,7 @@ private void writeAttributes(final Attributes attributes) {
170186
for (int i = 0; i < length; ++i) {
171187
final String name = emitNamespace ? attributes.getQName(i) : attributes.getLocalName(i);
172188
final String value = attributes.getValue(i);
173-
getReceiver().literal(name, value);
189+
getReceiver().literal(attributeMarker + name, value);
174190
}
175191
}
176192

metafacture-xml/src/main/java/org/metafacture/xml/SimpleXmlEncoder.java

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.metafacture.framework.annotations.In;
2727
import org.metafacture.framework.annotations.Out;
2828
import org.metafacture.framework.helpers.DefaultStreamPipe;
29+
import org.metafacture.framework.helpers.DefaultXmlPipe;
2930

3031
import java.io.IOException;
3132
import java.net.URL;
@@ -53,9 +54,6 @@ public final class SimpleXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Str
5354

5455
public static final String ATTRIBUTE_MARKER = "~";
5556

56-
public static final String DEFAULT_ROOT_TAG = "records";
57-
public static final String DEFAULT_RECORD_TAG = "record";
58-
5957
private static final String NEW_LINE = "\n";
6058
private static final String INDENT = "\t";
6159

@@ -72,8 +70,10 @@ public final class SimpleXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Str
7270

7371
private final StringBuilder builder = new StringBuilder();
7472

75-
private String rootTag = DEFAULT_ROOT_TAG;
76-
private String recordTag = DEFAULT_RECORD_TAG;
73+
private String attributeMarker = ATTRIBUTE_MARKER;
74+
private String rootTag = DefaultXmlPipe.DEFAULT_ROOT_TAG;
75+
private String recordTag = DefaultXmlPipe.DEFAULT_RECORD_TAG;
76+
private String valueTag = DefaultXmlPipe.DEFAULT_VALUE_TAG;
7777
private Map<String, String> namespaces = new HashMap<String, String>();
7878
private boolean writeRootTag = true;
7979
private boolean writeXmlHeader = true;
@@ -96,6 +96,14 @@ public void setRecordTag(final String tag) {
9696
recordTag = tag;
9797
}
9898

99+
public void setValueTag(final String valueTag) {
100+
this.valueTag = valueTag;
101+
}
102+
103+
public String getValueTag() {
104+
return valueTag;
105+
}
106+
99107
public void setNamespaceFile(final String file) {
100108
final Properties properties;
101109
try {
@@ -146,6 +154,14 @@ public void setNamespaces(final Map<String, String> namespaces) {
146154
this.namespaces = namespaces;
147155
}
148156

157+
public void setAttributeMarker(final String attributeMarker) {
158+
this.attributeMarker = attributeMarker;
159+
}
160+
161+
public String getAttributeMarker() {
162+
return attributeMarker;
163+
}
164+
149165
@Override
150166
public void startRecord(final String identifier) {
151167
if (separateRoots) {
@@ -192,11 +208,11 @@ public void endEntity() {
192208

193209
@Override
194210
public void literal(final String name, final String value) {
195-
if (name.isEmpty()) {
211+
if (name.equals(valueTag)) {
196212
element.setText(value);
197213
}
198-
else if (name.startsWith(ATTRIBUTE_MARKER)) {
199-
element.addAttribute(name.substring(1), value);
214+
else if (name.startsWith(attributeMarker)) {
215+
element.addAttribute(name.substring(attributeMarker.length()), value);
200216
}
201217
else {
202218
element.createChild(name).setText(value);

metafacture-xml/src/test/java/org/metafacture/xml/GenericXMLHandlerTest.java

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.metafacture.framework.StreamReceiver;
2626
import org.mockito.InOrder;
2727
import org.mockito.Mock;
28+
import org.mockito.Mockito;
2829
import org.mockito.MockitoAnnotations;
2930
import org.xml.sax.helpers.AttributesImpl;
3031

@@ -132,6 +133,28 @@ public void shouldEmitPCDataAsALiteralNamedValue() {
132133
ordered.verify(receiver).literal("value", "char-data");
133134
}
134135

136+
@Test
137+
public void shouldEmitPCDataAsALiteralWithConfiguredValueTagName() {
138+
final String name = "data";
139+
genericXmlHandler.setValueTagName(name);
140+
141+
final char[] charData = "char-data".toCharArray();
142+
genericXmlHandler.startElement("", "record", "record", attributes);
143+
genericXmlHandler.startElement("", "entity", "entity", attributes);
144+
genericXmlHandler.characters(charData, 0, charData.length);
145+
genericXmlHandler.endElement("", "entity", "entity");
146+
genericXmlHandler.endElement("", "record", "record");
147+
148+
final InOrder ordered = inOrder(receiver);
149+
ordered.verify(receiver).startRecord("");
150+
ordered.verify(receiver).startEntity("entity");
151+
ordered.verify(receiver).literal(name, "char-data");
152+
ordered.verify(receiver).endEntity();
153+
ordered.verify(receiver).endRecord();
154+
ordered.verifyNoMoreInteractions();
155+
Mockito.verifyNoMoreInteractions(receiver);
156+
}
157+
135158
@Test
136159
public void shouldEmitNamespaceOnEntityElementAndAttribute() {
137160
genericXmlHandler.setEmitNamespace(true);
@@ -141,6 +164,47 @@ public void shouldEmitNamespaceOnEntityElementAndAttribute() {
141164

142165
final InOrder ordered = inOrder(receiver);
143166
ordered.verify(receiver).startEntity("ns:entity");
144-
ordered.verify(receiver).literal("ns:attr","attr-value");
167+
ordered.verify(receiver).literal("ns:attr", "attr-value");
168+
}
169+
170+
@Test
171+
public void shouldNotEncodeAttributesAsMarkedLiterals() {
172+
attributes.addAttribute("", "attr", "attr", "CDATA", "attr-value");
173+
genericXmlHandler.startElement("", "record", "record", attributes);
174+
genericXmlHandler.endElement("", "record", "record");
175+
176+
final InOrder ordered = inOrder(receiver);
177+
ordered.verify(receiver).startRecord("");
178+
ordered.verify(receiver).literal("attr", "attr-value");
179+
ordered.verify(receiver).endRecord();
180+
ordered.verifyNoMoreInteractions();
181+
Mockito.verifyNoMoreInteractions(receiver);
182+
}
183+
184+
@Test
185+
public void issue379_shouldEncodeAttributesAsLiteralsWithConfiguredMarker() {
186+
final String marker = "~";
187+
genericXmlHandler.setAttributeMarker(marker);
188+
189+
genericXmlHandler.startElement("", "record", "record", attributes);
190+
attributes.addAttribute("", "authority", "authority", "CDATA", "marcrelator");
191+
attributes.addAttribute("", "type", "type", "CDATA", "text");
192+
genericXmlHandler.startElement("", "roleTerm", "roleTerm", attributes);
193+
final char[] charData = "Author".toCharArray();
194+
genericXmlHandler.characters(charData, 0, charData.length);
195+
genericXmlHandler.endElement("", "roleTerm", "roleTerm");
196+
genericXmlHandler.endElement("", "record", "record");
197+
198+
final InOrder ordered = inOrder(receiver);
199+
ordered.verify(receiver).startRecord("");
200+
ordered.verify(receiver).startEntity("roleTerm");
201+
ordered.verify(receiver).literal(marker + "authority", "marcrelator");
202+
ordered.verify(receiver).literal(marker + "type", "text");
203+
ordered.verify(receiver).literal("value", "Author");
204+
ordered.verify(receiver).endEntity();
205+
ordered.verify(receiver).endRecord();
206+
ordered.verifyNoMoreInteractions();
207+
Mockito.verifyNoMoreInteractions(receiver);
145208
}
209+
146210
}

0 commit comments

Comments
 (0)