Skip to content

Commit 83ea97d

Browse files
committed
fixed #9
Extracted formatter classes from FormetaDecoder
1 parent 93411e8 commit 83ea97d

File tree

11 files changed

+724
-387
lines changed

11 files changed

+724
-387
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.formeta.formatter;
17+
18+
import org.culturegraph.mf.formeta.Formeta;
19+
20+
/**
21+
* Base class for formatters.
22+
*
23+
* @author Christoph Böhme.
24+
*
25+
*/
26+
public abstract class AbstractFormatter implements Formatter {
27+
28+
public static final String ESCAPED_CHARS_QUOTED = "\n\r"
29+
+ Formeta.QUOT_CHAR
30+
+ Formeta.ESCAPE_CHAR;
31+
32+
public static final String ESCAPED_CHARS = ESCAPED_CHARS_QUOTED
33+
+ Formeta.GROUP_START
34+
+ Formeta.GROUP_END
35+
+ Formeta.ITEM_SEPARATOR
36+
+ Formeta.NAME_VALUE_SEPARATOR;
37+
38+
private static final int BUFFER_SIZE = 1024;
39+
40+
private final StringBuilder builder = new StringBuilder();
41+
42+
private char[] buffer = new char[BUFFER_SIZE];
43+
44+
@Override
45+
public final void reset() {
46+
builder.delete(0, builder.length());
47+
onReset();
48+
}
49+
50+
@Override
51+
public final String toString() {
52+
return builder.toString();
53+
}
54+
55+
protected final void append(final char ch) {
56+
builder.append(ch);
57+
}
58+
59+
protected final void append(final CharSequence charSeq) {
60+
builder.append(charSeq);
61+
}
62+
63+
protected final void escapeAndAppend(final String str) {
64+
// According to http://stackoverflow.com/a/11876086 it is faster to copy
65+
// the string into a char array than to use charAt():
66+
final int strLen = str.length();
67+
if(strLen > buffer.length) {
68+
buffer = new char[buffer.length * 2];
69+
}
70+
str.getChars(0, strLen, buffer, 0);
71+
72+
final boolean addQuotes = shouldQuoteText(buffer, strLen);
73+
final String charsToEscape;
74+
if (addQuotes) {
75+
builder.append(Formeta.QUOT_CHAR);
76+
charsToEscape = ESCAPED_CHARS_QUOTED;
77+
} else {
78+
charsToEscape = ESCAPED_CHARS;
79+
}
80+
for (int i = 0; i < strLen; ++i) {
81+
final char ch = buffer[i];
82+
if (charsToEscape.indexOf(ch) > -1) {
83+
appendEscapedChar(ch);
84+
} else {
85+
builder.append(ch);
86+
}
87+
}
88+
if (addQuotes) {
89+
builder.append(Formeta.QUOT_CHAR);
90+
}
91+
}
92+
93+
protected void onReset() {
94+
// Default implementation does nothing
95+
}
96+
97+
protected abstract boolean shouldQuoteText(final char[] buffer, final int len);
98+
99+
private void appendEscapedChar(final char ch) {
100+
builder.append(Formeta.ESCAPE_CHAR);
101+
switch (ch) {
102+
case '\n':
103+
builder.append(Formeta.NEWLINE_ESC_SEQ);
104+
break;
105+
case '\r':
106+
builder.append(Formeta.CARRIAGE_RETURN_ESC_SEQ);
107+
break;
108+
default:
109+
builder.append(ch);
110+
}
111+
}
112+
113+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.formeta.formatter;
17+
18+
import org.culturegraph.mf.formeta.Formeta;
19+
20+
/**
21+
* A formatter for concise output.
22+
*
23+
* @author Christoph Böhme
24+
*
25+
*/
26+
public final class ConciseFormatter extends AbstractFormatter {
27+
28+
private boolean appendItemSeparator;
29+
30+
@Override
31+
public void startGroup(final String name) {
32+
if (appendItemSeparator) {
33+
append(Formeta.ITEM_SEPARATOR);
34+
}
35+
escapeAndAppend(name);
36+
append(Formeta.GROUP_START);
37+
appendItemSeparator = false;
38+
}
39+
40+
@Override
41+
public void endGroup() {
42+
append(Formeta.GROUP_END);
43+
appendItemSeparator = false;
44+
}
45+
46+
@Override
47+
public void literal(final String name, final String value) {
48+
if (appendItemSeparator) {
49+
append(Formeta.ITEM_SEPARATOR);
50+
}
51+
escapeAndAppend(name);
52+
append(Formeta.NAME_VALUE_SEPARATOR);
53+
escapeAndAppend(value);
54+
appendItemSeparator = true;
55+
}
56+
57+
@Override
58+
protected void onReset() {
59+
appendItemSeparator = false;
60+
}
61+
62+
@Override
63+
protected boolean shouldQuoteText(final char[] buffer, final int len) {
64+
return len != 0 && (Formeta.WHITESPACE.indexOf(buffer[0]) > -1 ||
65+
Formeta.WHITESPACE.indexOf(buffer[len - 1]) > -1);
66+
}
67+
68+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.formeta.formatter;
17+
18+
/**
19+
* Interface for formatters.
20+
*
21+
* @author Christoph Böhme
22+
*
23+
*/
24+
public interface Formatter {
25+
26+
void reset();
27+
28+
void startGroup(final String name);
29+
30+
void endGroup();
31+
32+
void literal(final String name, final String value);
33+
34+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.formeta.formatter;
17+
18+
/**
19+
* Output styles for formeta.
20+
*
21+
* @author Christoph Böhme
22+
*/
23+
public enum FormatterStyle {
24+
/**
25+
* Dense and concise output style with minimal quotation and
26+
* item separation and without any unnecessary whitespace.
27+
* Good for automatic processing.
28+
*/
29+
CONCISE {
30+
@Override
31+
public Formatter createFormatter() {
32+
return new ConciseFormatter();
33+
}
34+
},
35+
36+
/**
37+
* Output style which aims to be easy to read. Inserts
38+
* additional whitespace and item separators. Uses
39+
* quotation marks extensively.
40+
*/
41+
VERBOSE {
42+
@Override
43+
public Formatter createFormatter() {
44+
return new VerboseFormatter();
45+
}
46+
},
47+
48+
/**
49+
* Similar to the {@code VERBOSE} style but additionally
50+
* adds line breaks and indents to support readability.
51+
*/
52+
MULTILINE {
53+
@Override
54+
public Formatter createFormatter() {
55+
return new MultilineFormatter();
56+
}
57+
};
58+
59+
public abstract Formatter createFormatter();
60+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.formeta.formatter;
17+
18+
import org.culturegraph.mf.formeta.Formeta;
19+
20+
/**
21+
* A formatter for multiline output.
22+
*
23+
* @author Christoph Böhme
24+
*
25+
*/
26+
public final class MultilineFormatter extends AbstractFormatter {
27+
28+
public static final String INDENT = "\t";
29+
30+
private static final String GROUP_START = " " + Formeta.GROUP_START;
31+
private static final String NAME_VALUE_SEPARATOR = Formeta.NAME_VALUE_SEPARATOR + " ";
32+
33+
private final StringBuilder indent = new StringBuilder();
34+
35+
private boolean appendItemSeparator;
36+
private boolean firstItem;
37+
38+
public MultilineFormatter() {
39+
super();
40+
onReset();
41+
}
42+
43+
@Override
44+
public void startGroup(final String name) {
45+
if (appendItemSeparator) {
46+
append(Formeta.ITEM_SEPARATOR);
47+
}
48+
if (!firstItem) {
49+
append(indent);
50+
}
51+
escapeAndAppend(name);
52+
append(GROUP_START);
53+
54+
indent.append(INDENT);
55+
appendItemSeparator = false;
56+
firstItem = false;
57+
}
58+
59+
@Override
60+
public void endGroup() {
61+
indent.delete(indent.length() - INDENT.length(), indent.length());
62+
63+
append(indent);
64+
append(Formeta.GROUP_END);
65+
appendItemSeparator = true;
66+
}
67+
68+
@Override
69+
public void literal(final String name, final String value) {
70+
if (appendItemSeparator) {
71+
append(Formeta.ITEM_SEPARATOR);
72+
}
73+
if (!firstItem) {
74+
append(indent);
75+
}
76+
escapeAndAppend(name);
77+
append(NAME_VALUE_SEPARATOR);
78+
escapeAndAppend(value);
79+
appendItemSeparator = true;
80+
firstItem = false;
81+
}
82+
83+
@Override
84+
protected void onReset() {
85+
indent.delete(0, indent.length());
86+
indent.append('\n');
87+
appendItemSeparator = false;
88+
firstItem = true;
89+
}
90+
91+
@Override
92+
protected boolean shouldQuoteText(final char[] buffer, final int len) {
93+
return true;
94+
}
95+
96+
}

0 commit comments

Comments
 (0)