Skip to content

Commit 7e65dc4

Browse files
authored
Merge pull request #301 from metafacture/300-addMarcXmlEncoder
Add MarcXmlEncoder
2 parents 72bf09f + 6dab118 commit 7e65dc4

File tree

9 files changed

+574
-52
lines changed

9 files changed

+574
-52
lines changed

metafacture-biblio/src/main/java/org/metafacture/biblio/iso2709/Record.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,14 @@ public String getRecordId() {
156156
return buffer.stringAt(dataStart, dataLength, charset);
157157
}
158158

159+
/**
160+
* Returns the record leader.
161+
*
162+
* @return a string which is the record leader.
163+
*/
164+
public String getLabel() {
165+
return label.toString();
166+
}
159167
/**
160168
* Iterates through all fields in the record and calls the appropriate method
161169
* on the supplied {@link FieldHandler} instance.

metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/Marc21Decoder.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ public final class Marc21Decoder
140140
private final FieldHandler fieldHandler = new Marc21Handler();
141141

142142
private boolean ignoreMissingId;
143+
private boolean emitLeaderAsWhole;
143144

144145
/**
145146
* Controls whether the decoder aborts processing if a record has no
@@ -164,6 +165,24 @@ public boolean getIgnoreMissingId() {
164165
return ignoreMissingId;
165166
}
166167

168+
/**
169+
* Controls whether the Record Leader should be emitted as a whole instead of
170+
* extracting the bibliographic information in the record leader.
171+
*
172+
* @see <a href="http://www.loc.gov/marc/bibliographic/bdleader.html">MARC 21
173+
* Standard: Record Leader</a>
174+
*
175+
* @param emitLeaderAsWhole
176+
* true if the leader should be emitted as a whole.
177+
*/
178+
public void setEmitLeaderAsWhole(final boolean emitLeaderAsWhole) {
179+
this.emitLeaderAsWhole = emitLeaderAsWhole;
180+
}
181+
182+
public boolean getEmitLeaderAsWhole() {
183+
return emitLeaderAsWhole;
184+
}
185+
167186
@Override
168187
public void process(final String obj) {
169188
if (obj.isEmpty()) {
@@ -207,9 +226,12 @@ private String tryGetRecordId(final Record record) {
207226
}
208227

209228
private void emitLeader(final Record record) {
229+
getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY);
230+
if (emitLeaderAsWhole){
231+
getReceiver().literal(Marc21EventNames.LEADER_ENTITY, record.getLabel());
232+
}else {
210233
final char[] implCodes = record.getImplCodes();
211234
final char[] systemChars = record.getSystemChars();
212-
getReceiver().startEntity(Marc21EventNames.LEADER_ENTITY);
213235
getReceiver().literal(Marc21EventNames.RECORD_STATUS_LITERAL, String.valueOf(
214236
record.getRecordStatus()));
215237
getReceiver().literal(Marc21EventNames.RECORD_TYPE_LITERAL, String.valueOf(
@@ -226,6 +248,7 @@ private void emitLeader(final Record record) {
226248
systemChars[Marc21Constants.CATALOGING_FORM_INDEX]));
227249
getReceiver().literal(Marc21EventNames.MULTIPART_LEVEL_LITERAL, String.valueOf(
228250
systemChars[Marc21Constants.MULTIPART_LEVEL_INDEX]));
251+
}
229252
getReceiver().endEntity();
230253
}
231254

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
/* Copyright 2019 Pascal Christoph (hbz) and others
2+
*
3+
* Licensed under the Apache License, Version 2.0 the "License";
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
package org.metafacture.biblio.marc21;
17+
18+
import java.util.Collections;
19+
20+
import org.metafacture.commons.XmlUtil;
21+
import org.metafacture.framework.FluxCommand;
22+
import org.metafacture.framework.MetafactureException;
23+
import org.metafacture.framework.ObjectReceiver;
24+
import org.metafacture.framework.StreamReceiver;
25+
import org.metafacture.framework.annotations.Description;
26+
import org.metafacture.framework.annotations.In;
27+
import org.metafacture.framework.annotations.Out;
28+
import org.metafacture.framework.helpers.DefaultStreamPipe;
29+
30+
/**
31+
* Encodes a stream into MARCXML.
32+
*
33+
* @author some Jan (Eberhardt) did almost all
34+
* @author Pascal Christoph (dr0i) dug it up again
35+
*/
36+
37+
@Description("Encodes a stream into MARCXML.")
38+
@In(StreamReceiver.class)
39+
@Out(String.class)
40+
@FluxCommand("encode-marcxml")
41+
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
42+
private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
43+
private static final String ROOT_CLOSE = "</marc:collection>";
44+
45+
private static final String RECORD_OPEN = "<marc:record>";
46+
private static final String RECORD_CLOSE = "</marc:record>";
47+
48+
private static final String CONTROLFIELD_OPEN_TEMPLATE = "<marc:controlfield tag=\"%s\">";
49+
private static final String CONTROLFIELD_CLOSE = "</marc:controlfield>";
50+
51+
private static final String DATAFIELD_OPEN_TEMPLATE = "<marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">";
52+
private static final String DATAFIELD_CLOSE = "</marc:datafield>";
53+
54+
private static final String SUBFIELD_OPEN_TEMPLATE = "<marc:subfield code=\"%s\">";
55+
private static final String SUBFIELD_CLOSE = "</marc:subfield>";
56+
57+
private static final String LEADER_OPEN_TEMPLATE = "<marc:leader>";
58+
private static final String LEADER_CLOSE_TEMPLATE = "</marc:leader>";
59+
60+
private static final String NEW_LINE = "\n";
61+
private static final String INDENT = "\t";
62+
63+
private static final String XML_DECLARATION_TEMPLATE = "<?xml version=\"%s\" encoding=\"%s\"?>";
64+
65+
private final StringBuilder builder;
66+
67+
private boolean atStreamStart;
68+
69+
private boolean omitXmlDeclaration;
70+
private String xmlVersion;
71+
private String xmlEncoding;
72+
73+
private String currentEntity;
74+
private int indentationLevel;
75+
private boolean formatted;
76+
77+
public MarcXmlEncoder() {
78+
this.builder = new StringBuilder();
79+
this.atStreamStart = true;
80+
81+
this.omitXmlDeclaration = false;
82+
this.xmlVersion = "1.0";
83+
this.xmlEncoding = "UTF-8";
84+
85+
this.currentEntity = "";
86+
87+
this.indentationLevel = 0;
88+
this.formatted = true;
89+
}
90+
91+
public void omitXmlDeclaration(boolean omitXmlDeclaration) {
92+
this.omitXmlDeclaration = omitXmlDeclaration;
93+
}
94+
95+
public void setXmlVersion(String xmlVersion) {
96+
this.xmlVersion = xmlVersion;
97+
}
98+
99+
public void setXmlEncoding(String xmlEncoding) {
100+
this.xmlEncoding = xmlEncoding;
101+
}
102+
103+
/**
104+
* Formats the resulting xml, by indentation.
105+
*
106+
* @param formatted
107+
* True, if formatting is activated.
108+
*/
109+
public void setFormatted(boolean formatted) {
110+
this.formatted = formatted;
111+
}
112+
113+
@Override
114+
public void startRecord(final String identifier) {
115+
if (atStreamStart) {
116+
if (!omitXmlDeclaration) {
117+
writeHeader();
118+
prettyPrintNewLine();
119+
}
120+
writeRaw(ROOT_OPEN);
121+
prettyPrintNewLine();
122+
incrementIndentationLevel();
123+
}
124+
atStreamStart = false;
125+
126+
prettyPrintIndentation();
127+
writeRaw(RECORD_OPEN);
128+
prettyPrintNewLine();
129+
130+
incrementIndentationLevel();
131+
}
132+
133+
@Override
134+
public void endRecord() {
135+
decrementIndentationLevel();
136+
prettyPrintIndentation();
137+
writeRaw(RECORD_CLOSE);
138+
prettyPrintNewLine();
139+
sendAndClearData();
140+
}
141+
142+
@Override
143+
public void startEntity(final String name) {
144+
currentEntity = name;
145+
if (!name.equals(Marc21EventNames.LEADER_ENTITY)) {
146+
if (name.length() != 5) {
147+
String message = String.format("Entity too short." + "Got a string ('%s') of length %d."
148+
+ "Expected a length of 5 (field + indicators).", name, name.length());
149+
throw new MetafactureException(message);
150+
}
151+
152+
String tag = name.substring(0, 3);
153+
String ind1 = name.substring(3, 4);
154+
String ind2 = name.substring(4, 5);
155+
prettyPrintIndentation();
156+
writeRaw(String.format(DATAFIELD_OPEN_TEMPLATE, tag, ind1, ind2));
157+
prettyPrintNewLine();
158+
incrementIndentationLevel();
159+
}
160+
}
161+
162+
@Override
163+
public void endEntity() {
164+
if (!currentEntity.equals(Marc21EventNames.LEADER_ENTITY)) {
165+
decrementIndentationLevel();
166+
prettyPrintIndentation();
167+
writeRaw(DATAFIELD_CLOSE);
168+
prettyPrintNewLine();
169+
}
170+
currentEntity = "";
171+
}
172+
173+
@Override
174+
public void literal(final String name, final String value) {
175+
if (currentEntity.equals("")) {
176+
prettyPrintIndentation();
177+
writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name));
178+
writeEscaped(value.trim());
179+
writeRaw(CONTROLFIELD_CLOSE);
180+
prettyPrintNewLine();
181+
} else if (!currentEntity.equals(Marc21EventNames.LEADER_ENTITY)) {
182+
prettyPrintIndentation();
183+
writeRaw(String.format(SUBFIELD_OPEN_TEMPLATE, name));
184+
writeEscaped(value.trim());
185+
writeRaw(SUBFIELD_CLOSE);
186+
prettyPrintNewLine();
187+
} else {
188+
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
189+
prettyPrintIndentation();
190+
writeRaw(LEADER_OPEN_TEMPLATE + value + LEADER_CLOSE_TEMPLATE);
191+
prettyPrintNewLine();
192+
}
193+
}
194+
195+
}
196+
197+
@Override
198+
protected void onResetStream() {
199+
if (!atStreamStart) {
200+
writeFooter();
201+
}
202+
sendAndClearData();
203+
atStreamStart = true;
204+
}
205+
206+
@Override
207+
protected void onCloseStream() {
208+
writeFooter();
209+
sendAndClearData();
210+
}
211+
212+
/** Increments the indentation level by one */
213+
private void incrementIndentationLevel() {
214+
indentationLevel += 1;
215+
}
216+
217+
/** Decrements the indentation level by one */
218+
private void decrementIndentationLevel() {
219+
indentationLevel -= 1;
220+
}
221+
222+
/** Adds a XML Header */
223+
private void writeHeader() {
224+
writeRaw(String.format(XML_DECLARATION_TEMPLATE, xmlVersion, xmlEncoding));
225+
}
226+
227+
/** Closes the root tag */
228+
private void writeFooter() {
229+
writeRaw(ROOT_CLOSE);
230+
}
231+
232+
/** Writes a unescaped sequence */
233+
private void writeRaw(final String str) {
234+
builder.append(str);
235+
}
236+
237+
/** Writes a escaped sequence */
238+
private void writeEscaped(final String str) {
239+
builder.append(XmlUtil.escape(str, false));
240+
}
241+
242+
private void prettyPrintIndentation() {
243+
if (formatted) {
244+
String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
245+
builder.append(prefix);
246+
}
247+
}
248+
249+
private void prettyPrintNewLine() {
250+
if (formatted) {
251+
builder.append(NEW_LINE);
252+
}
253+
}
254+
255+
private void sendAndClearData() {
256+
getReceiver().process(builder.toString());
257+
builder.delete(0, builder.length());
258+
}
259+
}

metafacture-biblio/src/main/resources/flux-commands.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
decode-marc21 org.metafacture.biblio.marc21.Marc21Decoder
1717
encode-marc21 org.metafacture.biblio.marc21.Marc21Encoder
1818
handle-marcxml org.metafacture.biblio.marc21.MarcXmlHandler
19+
encode-marcxml org.metafacture.biblio.marc21.MarcXmlEncoder
1920

2021
decode-pica org.metafacture.biblio.pica.PicaDecoder
2122
encode-pica org.metafacture.biblio.pica.PicaEncoder

0 commit comments

Comments
 (0)