Skip to content

Commit 78aaca3

Browse files
committed
Add MarcXmlEncoder
- add tests This is a copy of the files of the closed but unmerged PR #297. See #300.
1 parent b2400c2 commit 78aaca3

File tree

3 files changed

+385
-0
lines changed

3 files changed

+385
-0
lines changed
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
package org.metafacture.biblio.marc21;
2+
3+
import java.util.Collections;
4+
5+
import org.metafacture.framework.FluxCommand;
6+
import org.metafacture.framework.MetafactureException;
7+
import org.metafacture.framework.ObjectReceiver;
8+
import org.metafacture.framework.StreamReceiver;
9+
import org.metafacture.framework.annotations.Description;
10+
import org.metafacture.framework.annotations.In;
11+
import org.metafacture.framework.annotations.Out;
12+
import org.metafacture.framework.helpers.DefaultStreamPipe;
13+
14+
/**
15+
* Encodes a stream into MARCXML.
16+
*
17+
* @author some Jan (Eberhardt) did almost all
18+
* @author Pascal Christoph (dr0i) dug it up again
19+
*/
20+
21+
@Description("Encodes a stream into MARCXML.")
22+
@In(StreamReceiver.class)
23+
@Out(String.class)
24+
@FluxCommand("encode-marc21")
25+
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
26+
private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
27+
private static final String ROOT_CLOSE = "</marc:collection>";
28+
29+
private static final String RECORD_OPEN = "<marc:record>";
30+
private static final String RECORD_CLOSE = "</marc:record>";
31+
32+
private static final String CONTROLFIELD_OPEN_TEMPLATE = "<marc:controlfield tag=\"%s\">";
33+
private static final String CONTROLFIELD_CLOSE = "</marc:controlfield>";
34+
35+
private static final String DATAFIELD_OPEN_TEMPLATE = "<marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">";
36+
private static final String DATAFIELD_CLOSE = "</marc:datafield>";
37+
38+
private static final String SUBFIELD_OPEN_TEMPLATE = "<marc:subfield code=\"%s\">";
39+
private static final String SUBFIELD_CLOSE = "</marc:subfield>";
40+
41+
private static final String NEW_LINE = "\n";
42+
private static final String INDENT = "\t";
43+
44+
private static final String XML_DECLARATION_TEMPLATE = "<?xml version=\"%s\" encoding=\"%s\"?>";
45+
46+
private final StringBuilder builder;
47+
48+
private boolean atStreamStart;
49+
50+
private boolean omitXmlDeclaration;
51+
private String xmlVersion;
52+
private String xmlEncoding;
53+
54+
private String currentEntity;
55+
private int indentationLevel;
56+
private boolean formatted;
57+
58+
public MarcXmlEncoder() {
59+
this.builder = new StringBuilder();
60+
this.atStreamStart = true;
61+
62+
this.omitXmlDeclaration = false;
63+
this.xmlVersion = "1.0";
64+
this.xmlEncoding = "UTF-8";
65+
66+
this.currentEntity = "";
67+
68+
this.indentationLevel = 0;
69+
this.formatted = true;
70+
}
71+
72+
public void omitXmlDeclaration(boolean omitXmlDeclaration) {
73+
this.omitXmlDeclaration = omitXmlDeclaration;
74+
}
75+
76+
public void setXmlVersion(String xmlVersion) {
77+
this.xmlVersion = xmlVersion;
78+
}
79+
80+
public void setXmlEncoding(String xmlEncoding) {
81+
this.xmlEncoding = xmlEncoding;
82+
}
83+
84+
/**
85+
* Formats the resulting xml, by indentation.
86+
*
87+
* @param formatted
88+
* True, if formatting is activated.
89+
*/
90+
public void setFormatted(boolean formatted) {
91+
this.formatted = formatted;
92+
}
93+
94+
@Override
95+
public void startRecord(final String identifier) {
96+
if (atStreamStart) {
97+
if (!omitXmlDeclaration) {
98+
writeHeader();
99+
prettyPrintNewLine();
100+
}
101+
writeRaw(ROOT_OPEN);
102+
prettyPrintNewLine();
103+
incrementIndentationLevel();
104+
}
105+
atStreamStart = false;
106+
107+
prettyPrintIndentation();
108+
writeRaw(RECORD_OPEN);
109+
prettyPrintNewLine();
110+
111+
incrementIndentationLevel();
112+
}
113+
114+
@Override
115+
public void endRecord() {
116+
decrementIndentationLevel();
117+
prettyPrintIndentation();
118+
writeRaw(RECORD_CLOSE);
119+
prettyPrintNewLine();
120+
sendAndClearData();
121+
}
122+
123+
@Override
124+
public void startEntity(final String name) {
125+
currentEntity = name;
126+
if (!name.equals("leader")) {
127+
if (name.length() != 5) {
128+
String message = String.format("Entity too short." + "Got a string ('%s') of length %d."
129+
+ "Expected a length of 5 (field + indicators).", name, name.length());
130+
throw new MetafactureException(message);
131+
}
132+
133+
String tag = name.substring(0, 3);
134+
String ind1 = name.substring(3, 4);
135+
String ind2 = name.substring(4, 5);
136+
prettyPrintIndentation();
137+
writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2));
138+
prettyPrintNewLine();
139+
incrementIndentationLevel();
140+
}
141+
}
142+
143+
@Override
144+
public void endEntity() {
145+
if (!currentEntity.equals("leader")) {
146+
decrementIndentationLevel();
147+
prettyPrintIndentation();
148+
writeRaw(DATAFIELD_CLOSE);
149+
prettyPrintNewLine();
150+
}
151+
currentEntity = "";
152+
}
153+
154+
@Override
155+
public void literal(final String name, final String value)
156+
{
157+
if (currentEntity.equals(""))
158+
{
159+
prettyPrintIndentation();
160+
writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name));
161+
writeEscaped(value.trim());
162+
writeRaw(CONTROLFIELD_CLOSE);
163+
prettyPrintNewLine();
164+
}
165+
else if (!currentEntity.equals("leader"))
166+
{
167+
prettyPrintIndentation();
168+
writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name));
169+
writeEscaped(value.trim());
170+
writeRaw(SUBFIELD_CLOSE);
171+
prettyPrintNewLine();
172+
}
173+
else {
174+
}
175+
176+
@Override
177+
protected void onResetStream() {
178+
if (!atStreamStart) {
179+
writeFooter();
180+
}
181+
sendAndClearData();
182+
atStreamStart = true;
183+
}
184+
185+
@Override
186+
protected void onCloseStream() {
187+
writeFooter();
188+
sendAndClearData();
189+
}
190+
191+
/** Increments the indentation level by one */
192+
private void incrementIndentationLevel() {
193+
indentationLevel += 1;
194+
}
195+
196+
/** Decrements the indentation level by one */
197+
private void decrementIndentationLevel() {
198+
indentationLevel -= 1;
199+
}
200+
201+
/** Adds a XML Header */
202+
private void writeHeader() {
203+
writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding));
204+
}
205+
206+
/** Closes the root tag */
207+
private void writeFooter() {
208+
writeRaw(ROOT_CLOSE);
209+
}
210+
211+
/** Writes a unescaped sequence */
212+
private void writeRaw(final String str) {
213+
builder.append(str);
214+
}
215+
216+
/** Writes a escaped sequence */
217+
private void writeEscaped(final String str) {
218+
final int len = str.length();
219+
for (int i = 0; i < len; ++i) {
220+
final char c = str.charAt(i);
221+
final String entityName;
222+
switch (c) {
223+
case '&':
224+
entityName = "amp";
225+
break;
226+
case '<':
227+
entityName = "lt";
228+
break;
229+
case '>':
230+
entityName = "gt";
231+
break;
232+
case '\'':
233+
entityName = "apos";
234+
break;
235+
case '"':
236+
entityName = "quot";
237+
break;
238+
default:
239+
entityName = null;
240+
break;
241+
}
242+
243+
if (entityName == null) {
244+
builder.append(c);
245+
} else {
246+
builder.append('&');
247+
builder.append(entityName);
248+
builder.append(';');
249+
}
250+
}
251+
}
252+
253+
private void prettyPrintIndentation() {
254+
if (formatted) {
255+
String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
256+
builder.append(prefix);
257+
}
258+
}
259+
260+
private void prettyPrintNewLine() {
261+
if (formatted) {
262+
builder.append(NEW_LINE);
263+
}
264+
}
265+
266+
private void sendAndClearData() {
267+
getReceiver().process(builder.toString());
268+
builder.delete(0, builder.length());
269+
}
270+
}

metafacture-biblio/src/main/resources/flux-commands.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
decode-marc21 org.metafacture.biblio.marc21.Marc21Decoder
1717
encode-marc21 org.metafacture.biblio.marc21.Marc21Encoder
1818
handle-marcxml org.metafacture.biblio.marc21.MarcXmlHandler
19+
encode-marcxml org.metafacture.biblio.marc21.MarcXmlEncoder
1920

2021
decode-pica org.metafacture.biblio.pica.PicaDecoder
2122
encode-pica org.metafacture.biblio.pica.PicaEncoder
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package org.metafacture.biblio.marc21;
2+
3+
import static org.junit.Assert.assertEquals;
4+
import static org.junit.Assert.assertTrue;
5+
6+
import org.junit.After;
7+
import org.junit.Before;
8+
import org.junit.Test;
9+
import org.metafacture.framework.helpers.DefaultObjectReceiver;
10+
11+
public class MarcXmlEncoderTest {
12+
13+
private StringBuilder resultCollector;
14+
private MarcXmlEncoder encoder;
15+
16+
@Before
17+
public void setUp() throws Exception {
18+
encoder = new MarcXmlEncoder();
19+
encoder.setFormatted(false);
20+
encoder.setReceiver(new DefaultObjectReceiver<String>() {
21+
@Override
22+
public void process(final String obj) {
23+
resultCollector.append(obj);
24+
}
25+
});
26+
resultCollector = new StringBuilder();
27+
}
28+
29+
@After
30+
public void tearDown() throws Exception {
31+
}
32+
33+
private void addOneRecord(MarcXmlEncoder encoder) {
34+
encoder.startRecord("92005291");
35+
encoder.literal("001", "92005291");
36+
encoder.startEntity("010 ");
37+
encoder.literal("a", "92005291");
38+
encoder.endEntity();
39+
encoder.endRecord();
40+
}
41+
42+
@Test
43+
public void doNotOmitXmlDeclaration() throws Exception {
44+
encoder.omitXmlDeclaration(false);
45+
addOneRecord(encoder);
46+
encoder.closeStream();
47+
48+
String actual = resultCollector.toString();
49+
assertTrue(actual.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
50+
}
51+
52+
@Test
53+
public void omitXmlDeclaration() throws Exception {
54+
encoder.omitXmlDeclaration(true);
55+
addOneRecord(encoder);
56+
encoder.closeStream();
57+
String actual = resultCollector.toString();
58+
assertTrue(actual.startsWith("<marc:collection"));
59+
assertTrue(actual.endsWith("</marc:collection>"));
60+
}
61+
62+
@Test
63+
public void setXmlVersion() throws Exception {
64+
encoder.omitXmlDeclaration(false);
65+
encoder.setXmlVersion("1.1");
66+
addOneRecord(encoder);
67+
encoder.closeStream();
68+
69+
String actual = resultCollector.toString();
70+
assertTrue(actual.startsWith("<?xml version=\"1.1\" encoding=\"UTF-8\"?>"));
71+
}
72+
73+
@Test
74+
public void setXmlEncoding() throws Exception {
75+
encoder.omitXmlDeclaration(false);
76+
encoder.setXmlEncoding("UTF-16");
77+
addOneRecord(encoder);
78+
encoder.closeStream();
79+
80+
String actual = resultCollector.toString();
81+
assertTrue(actual.startsWith("<?xml version=\"1.0\" encoding=\"UTF-16\"?>"));
82+
}
83+
84+
@Test
85+
public void createAnEmptyRecord() throws Exception {
86+
encoder.startRecord("1");
87+
encoder.endRecord();
88+
encoder.closeStream();
89+
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record></marc:record></marc:collection>";
90+
String actual = resultCollector.toString();
91+
assertEquals(expected, actual);
92+
}
93+
94+
@Test
95+
public void createARecord() throws Exception {
96+
addOneRecord(encoder);
97+
encoder.closeStream();
98+
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record></marc:collection>";
99+
String actual = resultCollector.toString();
100+
assertEquals(expected, actual);
101+
}
102+
103+
@Test
104+
public void createTwoRecordsInOneCollection() throws Exception {
105+
addOneRecord(encoder);
106+
addOneRecord(encoder);
107+
encoder.closeStream();
108+
109+
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\"><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record><marc:record><marc:controlfield tag=\"001\">92005291</marc:controlfield><marc:datafield tag=\"010\" ind1=\" \" ind2=\" \"><marc:subfield code=\"a\">92005291</marc:subfield></marc:datafield></marc:record></marc:collection>";
110+
String actual = resultCollector.toString();
111+
112+
assertEquals(expected, actual);
113+
}
114+
}

0 commit comments

Comments
 (0)