Skip to content

Commit 7e8c13a

Browse files
author
Markus M. Geipel
committed
Merge pull request #114 from cboehme/issue-98
Issue 98: SimpleXmlWriter should only call process once per record
2 parents db3bae2 + f264396 commit 7e8c13a

File tree

2 files changed

+168
-149
lines changed

2 files changed

+168
-149
lines changed

src/main/java/org/culturegraph/mf/stream/sink/SimpleXmlWriter.java

Lines changed: 149 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -33,91 +33,95 @@
3333
import org.culturegraph.mf.util.ResourceUtil;
3434

3535
/**
36-
*
36+
*
3737
* writes a stream to XML
38-
*
39-
* @author Markus Michael Geipel
40-
*
38+
*
39+
* @author Markus Michael Geipel, Christoph Böhme
40+
*
4141
*/
4242
@Description("writes a stream to xml")
4343
@In(StreamReceiver.class)
4444
@Out(String.class)
4545
public final class SimpleXmlWriter extends DefaultStreamPipe<ObjectReceiver<String>> {
46+
4647
public static final String ATTRIBUTE_MARKER = "~";
47-
// public static final String TEXT_CONTENT_MARKER = "_text";
4848
public static final String NAMESPACES = "namespaces";
49-
public static final String NEW_LINE = "\n";
5049

51-
private Element element;
50+
public static final String DEFAULT_ROOT_TAG = "records";
51+
public static final String DEFAULT_RECORD_TAG = "record";
52+
53+
private static final String NEW_LINE = "\n";
54+
private static final String INDENT = "\t";
55+
56+
private static final String BEGIN_ATTRIBUTE = "=\"";
57+
private static final String END_ATTRIBUTE = "\"";
58+
private static final String BEGIN_OPEN_ELEMENT = "<";
59+
private static final String END_OPEN_ELEMENT = ">";
60+
private static final String END_EMPTY_ELEMENT = " />";
61+
private static final String BEGIN_CLOSE_ELEMENT = "</";
62+
private static final String END_CLOSE_ELEMENT = ">";
63+
64+
private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
65+
private static final String XMLNS_MARKER = " xmlns:";
66+
67+
private final StringBuilder builder = new StringBuilder();
68+
69+
private String rootTag = DEFAULT_ROOT_TAG;
70+
private String recordTag = DEFAULT_RECORD_TAG;
5271
private Map<String, String> namespaces = new HashMap<String, String>();
53-
private String recordTag = "record";
54-
private String rootTag = "records";
55-
private boolean start = true;
56-
private boolean separateRoots;
5772
private boolean writeXmlHeader = true;
73+
private boolean separateRoots;
74+
75+
private Element element;
76+
private boolean atStreamStart = true;
5877

5978
public void setRootTag(final String rootTag) {
6079
this.rootTag = rootTag;
6180
}
6281

63-
public void setWriteXmlHeader(final boolean writeXmlHeader) {
64-
this.writeXmlHeader = writeXmlHeader;
65-
}
66-
67-
public void setSeparateRoots(final boolean separateRoots) {
68-
this.separateRoots = separateRoots;
82+
public void setRecordTag(final String tag) {
83+
recordTag = tag;
6984
}
7085

7186
public void setNamespaceFile(final String file) {
7287
final Properties properties = ResourceUtil.loadProperties(file);
73-
for (Entry<Object, Object> entry : properties.entrySet()) {
88+
for (final Entry<Object, Object> entry : properties.entrySet()) {
7489
namespaces.put(entry.getKey().toString(), entry.getValue().toString());
7590
}
7691
}
7792

78-
private void writeHeader() {
79-
final StringBuilder builder = new StringBuilder();
93+
public void setWriteXmlHeader(final boolean writeXmlHeader) {
94+
this.writeXmlHeader = writeXmlHeader;
95+
}
8096

81-
if (writeXmlHeader) {
82-
builder.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
83-
}
97+
public void setSeparateRoots(final boolean separateRoots) {
98+
this.separateRoots = separateRoots;
99+
}
84100

85-
builder.append("<");
86-
builder.append(rootTag);
87-
for (Entry<String, String> entry : namespaces.entrySet()) {
88-
builder.append(" xmlns:");
89-
builder.append(entry.getKey());
90-
builder.append("=\"");
91-
escape(builder, entry.getValue());
92-
builder.append("\"");
93-
}
94-
builder.append(">");
95-
getReceiver().process(builder.toString());
96-
start = false;
101+
public void configure(final MultiMap multimap) {
102+
this.namespaces = multimap.getMap(NAMESPACES);
97103
}
98104

99105
@Override
100106
public void startRecord(final String identifier) {
101-
if (separateRoots || start) {
107+
if (separateRoots) {
108+
writeHeader();
109+
} else if (atStreamStart) {
102110
writeHeader();
111+
sendAndClearData();
103112
}
113+
atStreamStart = false;
114+
104115
element = new Element(recordTag);
105116
}
106117

107118
@Override
108119
public void endRecord() {
109-
if (recordTag.isEmpty()) {
110-
final StringBuilder builder = new StringBuilder();
111-
for (Element child : element.getChildren()) {
112-
child.writeToStringBuilder(builder, 1);
113-
}
114-
getReceiver().process(builder.toString());
115-
} else {
116-
getReceiver().process(element.toString());
117-
}
120+
element.writeElement(builder, 1);
118121
if (separateRoots) {
119122
writeFooter();
120123
}
124+
sendAndClearData();
121125
}
122126

123127
@Override
@@ -137,42 +141,106 @@ public void literal(final String name, final String value) {
137141
} else if (name.startsWith(ATTRIBUTE_MARKER)) {
138142
element.addAttribute(name.substring(1), value);
139143
} else {
140-
final Element temp = element.createChild(name);
141-
temp.setText(value);
144+
element.createChild(name).setText(value);
142145
}
143146
}
144147

145-
public void configure(final MultiMap multimap) {
146-
this.namespaces = multimap.getMap(NAMESPACES);
147-
}
148-
149-
public void setRecordTag(final String tag) {
150-
recordTag = tag;
148+
@Override
149+
protected void onResetStream() {
150+
writeFooter();
151+
sendAndClearData();
152+
atStreamStart = true;
151153
}
152154

153155
@Override
154156
protected void onCloseStream() {
155157
if (!separateRoots) {
156158
writeFooter();
159+
sendAndClearData();
157160
}
158161
}
159162

163+
private void sendAndClearData() {
164+
getReceiver().process(builder.toString());
165+
builder.delete(0, builder.length());
166+
}
167+
168+
private void writeHeader() {
169+
if (writeXmlHeader) {
170+
builder.append(XML_HEADER);
171+
}
172+
173+
builder.append(BEGIN_OPEN_ELEMENT);
174+
builder.append(rootTag);
175+
for (final Entry<String, String> entry : namespaces.entrySet()) {
176+
builder.append(XMLNS_MARKER);
177+
builder.append(entry.getKey());
178+
builder.append(BEGIN_ATTRIBUTE);
179+
writeEscaped(builder, entry.getValue());
180+
builder.append(END_ATTRIBUTE);
181+
}
182+
builder.append(END_OPEN_ELEMENT);
183+
}
184+
160185
private void writeFooter() {
161-
getReceiver().process("</" + rootTag + ">");
186+
builder.append(NEW_LINE);
187+
builder.append(BEGIN_CLOSE_ELEMENT);
188+
builder.append(rootTag);
189+
builder.append(END_CLOSE_ELEMENT);
190+
}
191+
192+
protected static void writeEscaped(final StringBuilder builder, final String str) {
193+
194+
final int len = str.length();
195+
for (int i = 0; i < len; ++i) {
196+
final char c = str.charAt(i);
197+
final String entityName;
198+
switch (c) {
199+
case '&':
200+
entityName = "amp";
201+
break;
202+
case '<':
203+
entityName = "lt";
204+
break;
205+
case '>':
206+
entityName = "gt";
207+
break;
208+
case '\'':
209+
entityName = "apos";
210+
break;
211+
case '"':
212+
entityName = "quot";
213+
break;
214+
default:
215+
entityName = null;
216+
break;
217+
}
218+
219+
if (entityName == null) {
220+
builder.append(c);
221+
} else {
222+
builder.append('&');
223+
builder.append(entityName);
224+
builder.append(';');
225+
}
226+
}
162227
}
163228

164229
/**
230+
* An XML element.
165231
*
166232
*/
167233
private static final class Element {
234+
168235
private static final List<Element> NO_CHILDREN = Collections.emptyList();
169236

170237
private final StringBuilder attributes = new StringBuilder();
171-
private String text = "";
172-
private List<Element> children = NO_CHILDREN;
173238
private final Element parent;
174239
private final String name;
175240

241+
private String text = "";
242+
private List<Element> children = NO_CHILDREN;
243+
176244
public Element(final String name) {
177245
this.name = name;
178246
this.parent = null;
@@ -183,16 +251,12 @@ private Element(final String name, final Element parent) {
183251
this.parent = parent;
184252
}
185253

186-
public List<Element> getChildren() {
187-
return children;
188-
}
189-
190254
public void addAttribute(final String name, final String value) {
191255
attributes.append(" ");
192256
attributes.append(name);
193-
attributes.append("=\"");
194-
escape(attributes, value);
195-
attributes.append("\"");
257+
attributes.append(BEGIN_ATTRIBUTE);
258+
writeEscaped(attributes, value);
259+
attributes.append(END_ATTRIBUTE);
196260
}
197261

198262
public void setText(final String text) {
@@ -212,90 +276,44 @@ public Element getParent() {
212276
return parent;
213277
}
214278

215-
@Override
216-
public String toString() {
217-
final StringBuilder builder = new StringBuilder();
218-
writeToStringBuilder(builder, 1);
219-
return builder.toString();
220-
}
221-
222-
public void writeToStringBuilder(final StringBuilder builder, final int indent) {
223-
builder.append(NEW_LINE);
224-
indent(builder, indent);
225-
builder.append("<");
226-
builder.append(name);
227-
builder.append(attributes);
228-
if (text.isEmpty() && children.isEmpty()) {
229-
builder.append(" /");
279+
public void writeElement(final StringBuilder builder, final int indent) {
280+
if (!name.isEmpty()) {
281+
builder.append(NEW_LINE);
282+
writeIndent(builder, indent);
283+
builder.append(BEGIN_OPEN_ELEMENT);
284+
builder.append(name);
285+
builder.append(attributes);
286+
if (text.isEmpty() && children.isEmpty()) {
287+
builder.append(END_EMPTY_ELEMENT);
288+
return;
289+
}
290+
builder.append(END_OPEN_ELEMENT);
230291
}
231292

232-
builder.append(">");
293+
writeEscaped(builder, text);
233294

234-
escape(builder, text);
235-
236-
for (Element element : children) {
237-
element.writeToStringBuilder(builder, indent + 1);
295+
for (final Element element : children) {
296+
element.writeElement(builder, indent + 1);
238297
}
298+
239299
if (text.isEmpty() && !children.isEmpty()) {
240300
builder.append(NEW_LINE);
241-
indent(builder, indent);
301+
writeIndent(builder, indent);
242302
}
243303

244-
if (!text.isEmpty() || !children.isEmpty()) {
245-
builder.append("</");
304+
if (!name.isEmpty()) {
305+
builder.append(BEGIN_CLOSE_ELEMENT);
246306
builder.append(name);
247-
builder.append(">");
307+
builder.append(END_CLOSE_ELEMENT);
248308
}
249309
}
250310

251-
private static void indent(final StringBuilder builder, final int indent) {
311+
private static void writeIndent(final StringBuilder builder, final int indent) {
252312
for (int i = 0; i < indent; ++i) {
253-
builder.append("\t");
313+
builder.append(INDENT);
254314
}
255315
}
256-
}
257316

258-
@Override
259-
protected void onResetStream() {
260-
writeFooter();
261-
start = true;
262-
}
263-
264-
protected static void escape(final StringBuilder builder, final String str) {
265-
266-
final int len = str.length();
267-
for (int i = 0; i < len; ++i) {
268-
final char c = str.charAt(i);
269-
final String entityName;
270-
switch (c) {
271-
case '&':
272-
entityName = "amp";
273-
break;
274-
case '<':
275-
entityName = "lt";
276-
break;
277-
case '>':
278-
entityName = "gt";
279-
break;
280-
case '\'':
281-
entityName = "apos";
282-
break;
283-
case '"':
284-
entityName = "quot";
285-
break;
286-
default:
287-
entityName = null;
288-
break;
289-
}
290-
291-
if (entityName == null) {
292-
builder.append(c);
293-
} else {
294-
builder.append('&');
295-
builder.append(entityName);
296-
builder.append(';');
297-
}
298-
}
299317
}
300318

301319
}

0 commit comments

Comments
 (0)