1+ package org .metafacture .biblio .marc21 ;
2+
3+ import java .util .Collections ;
4+
5+ import org .metafacture .framework .FluxCommand ;
6+ import org .metafacture .framework .MetafactureException ;
7+ import org .metafacture .framework .ObjectReceiver ;
8+ import org .metafacture .framework .StreamReceiver ;
9+ import org .metafacture .framework .annotations .Description ;
10+ import org .metafacture .framework .annotations .In ;
11+ import org .metafacture .framework .annotations .Out ;
12+ import org .metafacture .framework .helpers .DefaultStreamPipe ;
13+
14+ /**
15+ * Encodes a stream into MARCXML.
16+ *
17+ * @author some Jan (Eberhardt) did almost all
18+ * @author Pascal Christoph (dr0i) dug it up again
19+ */
20+
21+ @ Description ("Encodes a stream into MARCXML." )
22+ @ In (StreamReceiver .class )
23+ @ Out (String .class )
24+ @ FluxCommand ("encode-marc21" )
25+ public final class MarcXmlEncoder extends DefaultStreamPipe <ObjectReceiver <String >> {
26+ private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\" http://www.loc.gov/MARC21/slim\" xmlns:xsi=\" http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\" http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" >" ;
27+ private static final String ROOT_CLOSE = "</marc:collection>" ;
28+
29+ private static final String RECORD_OPEN = "<marc:record>" ;
30+ private static final String RECORD_CLOSE = "</marc:record>" ;
31+
32+ private static final String CONTROLFIELD_OPEN_TEMPLATE = "<marc:controlfield tag=\" %s\" >" ;
33+ private static final String CONTROLFIELD_CLOSE = "</marc:controlfield>" ;
34+
35+ private static final String DATAFIELD_OPEN_TEMPLATE = "<marc:datafield tag=\" %s\" ind1=\" %s\" ind2=\" %s\" >" ;
36+ private static final String DATAFIELD_CLOSE = "</marc:datafield>" ;
37+
38+ private static final String SUBFIELD_OPEN_TEMPLATE = "<marc:subfield code=\" %s\" >" ;
39+ private static final String SUBFIELD_CLOSE = "</marc:subfield>" ;
40+
41+ private static final String NEW_LINE = "\n " ;
42+ private static final String INDENT = "\t " ;
43+
44+ private static final String XML_DECLARATION_TEMPLATE = "<?xml version=\" %s\" encoding=\" %s\" ?>" ;
45+
46+ private final StringBuilder builder ;
47+
48+ private boolean atStreamStart ;
49+
50+ private boolean omitXmlDeclaration ;
51+ private String xmlVersion ;
52+ private String xmlEncoding ;
53+
54+ private String currentEntity ;
55+ private int indentationLevel ;
56+ private boolean formatted ;
57+
58+ public MarcXmlEncoder () {
59+ this .builder = new StringBuilder ();
60+ this .atStreamStart = true ;
61+
62+ this .omitXmlDeclaration = false ;
63+ this .xmlVersion = "1.0" ;
64+ this .xmlEncoding = "UTF-8" ;
65+
66+ this .currentEntity = "" ;
67+
68+ this .indentationLevel = 0 ;
69+ this .formatted = true ;
70+ }
71+
72+ public void omitXmlDeclaration (boolean omitXmlDeclaration ) {
73+ this .omitXmlDeclaration = omitXmlDeclaration ;
74+ }
75+
76+ public void setXmlVersion (String xmlVersion ) {
77+ this .xmlVersion = xmlVersion ;
78+ }
79+
80+ public void setXmlEncoding (String xmlEncoding ) {
81+ this .xmlEncoding = xmlEncoding ;
82+ }
83+
84+ /**
85+ * Formats the resulting xml, by indentation.
86+ *
87+ * @param formatted
88+ * True, if formatting is activated.
89+ */
90+ public void setFormatted (boolean formatted ) {
91+ this .formatted = formatted ;
92+ }
93+
94+ @ Override
95+ public void startRecord (final String identifier ) {
96+ if (atStreamStart ) {
97+ if (!omitXmlDeclaration ) {
98+ writeHeader ();
99+ prettyPrintNewLine ();
100+ }
101+ writeRaw (ROOT_OPEN );
102+ prettyPrintNewLine ();
103+ incrementIndentationLevel ();
104+ }
105+ atStreamStart = false ;
106+
107+ prettyPrintIndentation ();
108+ writeRaw (RECORD_OPEN );
109+ prettyPrintNewLine ();
110+
111+ incrementIndentationLevel ();
112+ }
113+
114+ @ Override
115+ public void endRecord () {
116+ decrementIndentationLevel ();
117+ prettyPrintIndentation ();
118+ writeRaw (RECORD_CLOSE );
119+ prettyPrintNewLine ();
120+ sendAndClearData ();
121+ }
122+
123+ @ Override
124+ public void startEntity (final String name ) {
125+ currentEntity = name ;
126+ if (!name .equals ("leader" )) {
127+ if (name .length () != 5 ) {
128+ String message = String .format ("Entity too short." + "Got a string ('%s') of length %d."
129+ + "Expected a length of 5 (field + indicators)." , name , name .length ());
130+ throw new MetafactureException (message );
131+ }
132+
133+ String tag = name .substring (0 , 3 );
134+ String ind1 = name .substring (3 , 4 );
135+ String ind2 = name .substring (4 , 5 );
136+ prettyPrintIndentation ();
137+ writeRaw (String .format (DATAFIELD_OPEN_TEMPLATE , tag , ind1 , ind2 ));
138+ prettyPrintNewLine ();
139+ incrementIndentationLevel ();
140+ }
141+ }
142+
143+ @ Override
144+ public void endEntity () {
145+ if (!currentEntity .equals ("leader" )) {
146+ decrementIndentationLevel ();
147+ prettyPrintIndentation ();
148+ writeRaw (DATAFIELD_CLOSE );
149+ prettyPrintNewLine ();
150+ }
151+ currentEntity = "" ;
152+ }
153+
154+ @ Override
155+ public void literal (final String name , final String value )
156+ {
157+ if (currentEntity .equals ("" ))
158+ {
159+ prettyPrintIndentation ();
160+ writeRaw (String .format (CONTROLFIELD_OPEN_TEMPLATE , name ));
161+ writeEscaped (value .trim ());
162+ writeRaw (CONTROLFIELD_CLOSE );
163+ prettyPrintNewLine ();
164+ }
165+ else if (!currentEntity .equals ("leader" ))
166+ {
167+ prettyPrintIndentation ();
168+ writeRaw (String .format (SUBFIELD_OPEN_TEMPLATE , name ));
169+ writeEscaped (value .trim ());
170+ writeRaw (SUBFIELD_CLOSE );
171+ prettyPrintNewLine ();
172+ }
173+ else {
174+ }
175+
176+ @ Override
177+ protected void onResetStream () {
178+ if (!atStreamStart ) {
179+ writeFooter ();
180+ }
181+ sendAndClearData ();
182+ atStreamStart = true ;
183+ }
184+
185+ @ Override
186+ protected void onCloseStream () {
187+ writeFooter ();
188+ sendAndClearData ();
189+ }
190+
191+ /** Increments the indentation level by one */
192+ private void incrementIndentationLevel () {
193+ indentationLevel += 1 ;
194+ }
195+
196+ /** Decrements the indentation level by one */
197+ private void decrementIndentationLevel () {
198+ indentationLevel -= 1 ;
199+ }
200+
201+ /** Adds a XML Header */
202+ private void writeHeader () {
203+ writeRaw (String .format (XML_DECLARATION_TEMPLATE , xmlVersion , xmlEncoding ));
204+ }
205+
206+ /** Closes the root tag */
207+ private void writeFooter () {
208+ writeRaw (ROOT_CLOSE );
209+ }
210+
211+ /** Writes a unescaped sequence */
212+ private void writeRaw (final String str ) {
213+ builder .append (str );
214+ }
215+
216+ /** Writes a escaped sequence */
217+ private void writeEscaped (final String str ) {
218+ final int len = str .length ();
219+ for (int i = 0 ; i < len ; ++i ) {
220+ final char c = str .charAt (i );
221+ final String entityName ;
222+ switch (c ) {
223+ case '&' :
224+ entityName = "amp" ;
225+ break ;
226+ case '<' :
227+ entityName = "lt" ;
228+ break ;
229+ case '>' :
230+ entityName = "gt" ;
231+ break ;
232+ case '\'' :
233+ entityName = "apos" ;
234+ break ;
235+ case '"' :
236+ entityName = "quot" ;
237+ break ;
238+ default :
239+ entityName = null ;
240+ break ;
241+ }
242+
243+ if (entityName == null ) {
244+ builder .append (c );
245+ } else {
246+ builder .append ('&' );
247+ builder .append (entityName );
248+ builder .append (';' );
249+ }
250+ }
251+ }
252+
253+ private void prettyPrintIndentation () {
254+ if (formatted ) {
255+ String prefix = String .join ("" , Collections .nCopies (indentationLevel , INDENT ));
256+ builder .append (prefix );
257+ }
258+ }
259+
260+ private void prettyPrintNewLine () {
261+ if (formatted ) {
262+ builder .append (NEW_LINE );
263+ }
264+ }
265+
266+ private void sendAndClearData () {
267+ getReceiver ().process (builder .toString ());
268+ builder .delete (0 , builder .length ());
269+ }
270+ }
0 commit comments