Skip to content

Commit 524473b

Browse files
committed
Merge branch 'fix-issue-161' of https://github.com/cboehme/metafacture-core into cboehme-fix-issue-161
2 parents b1e1172 + 5c8002e commit 524473b

File tree

5 files changed

+188
-98
lines changed

5 files changed

+188
-98
lines changed

src/main/java/org/culturegraph/mf/formeta/formatter/AbstractFormatter.java

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,68 +16,66 @@
1616
package org.culturegraph.mf.formeta.formatter;
1717

1818
import org.culturegraph.mf.formeta.Formeta;
19+
import org.culturegraph.mf.util.StringUtil;
1920

2021
/**
2122
* Base class for formatters.
22-
*
23+
*
2324
* @author Christoph Böhme.
24-
*
25+
*
2526
*/
2627
public abstract class AbstractFormatter implements Formatter {
27-
28-
public static final String ESCAPED_CHARS_QUOTED = "\n\r"
29-
+ Formeta.QUOT_CHAR
28+
29+
public static final String ESCAPED_CHARS_QUOTED = "\n\r"
30+
+ Formeta.QUOT_CHAR
3031
+ Formeta.ESCAPE_CHAR;
31-
32-
public static final String ESCAPED_CHARS = ESCAPED_CHARS_QUOTED
33-
+ Formeta.GROUP_START
34-
+ Formeta.GROUP_END
35-
+ Formeta.ITEM_SEPARATOR
32+
33+
public static final String ESCAPED_CHARS = ESCAPED_CHARS_QUOTED
34+
+ Formeta.GROUP_START
35+
+ Formeta.GROUP_END
36+
+ Formeta.ITEM_SEPARATOR
3637
+ Formeta.NAME_VALUE_SEPARATOR;
3738

38-
private static final int BUFFER_SIZE = 1024;
39+
protected static final int BUFFER_SIZE = 1024;
3940

4041
private final StringBuilder builder = new StringBuilder();
41-
42+
4243
private char[] buffer = new char[BUFFER_SIZE];
43-
44+
4445
@Override
4546
public final void reset() {
4647
builder.delete(0, builder.length());
4748
onReset();
4849
}
49-
50+
5051
@Override
5152
public final String toString() {
5253
return builder.toString();
5354
}
54-
55+
5556
protected final void append(final char ch) {
5657
builder.append(ch);
5758
}
58-
59+
5960
protected final void append(final CharSequence charSeq) {
6061
builder.append(charSeq);
6162
}
6263

6364
protected final void escapeAndAppend(final String str) {
6465
// According to http://stackoverflow.com/a/11876086 it is faster to copy
65-
// the string into a char array than to use charAt():
66-
final int strLen = str.length();
67-
if(strLen > buffer.length) {
68-
buffer = new char[buffer.length * 2];
69-
}
70-
str.getChars(0, strLen, buffer, 0);
71-
72-
final boolean addQuotes = shouldQuoteText(buffer, strLen);
66+
// a string into a char array then to use charAt():
67+
buffer = StringUtil.copyToBuffer(str, buffer);
68+
final int bufferLen = str.length();
69+
70+
final boolean addQuotes = shouldQuoteText(buffer, bufferLen);
7371
final String charsToEscape;
7472
if (addQuotes) {
7573
builder.append(Formeta.QUOT_CHAR);
7674
charsToEscape = ESCAPED_CHARS_QUOTED;
7775
} else {
7876
charsToEscape = ESCAPED_CHARS;
7977
}
80-
for (int i = 0; i < strLen; ++i) {
78+
for (int i = 0; i < bufferLen; ++i) {
8179
final char ch = buffer[i];
8280
if (charsToEscape.indexOf(ch) > -1) {
8381
appendEscapedChar(ch);
@@ -89,11 +87,11 @@ protected final void escapeAndAppend(final String str) {
8987
builder.append(Formeta.QUOT_CHAR);
9088
}
9189
}
92-
90+
9391
protected void onReset() {
9492
// Default implementation does nothing
9593
}
96-
94+
9795
protected abstract boolean shouldQuoteText(final char[] buffer, final int len);
9896

9997
private void appendEscapedChar(final char ch) {
@@ -109,5 +107,5 @@ private void appendEscapedChar(final char ch) {
109107
builder.append(ch);
110108
}
111109
}
112-
110+
113111
}

src/main/java/org/culturegraph/mf/formeta/parser/FormetaParser.java

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
package org.culturegraph.mf.formeta.parser;
1717

1818
import org.culturegraph.mf.exceptions.FormatException;
19+
import org.culturegraph.mf.util.StringUtil;
1920

2021
/**
2122
* A parser for the formeta metadata serialisation format.
22-
*
23+
*
2324
* @author Christoph Böhme
2425
*
2526
*/
@@ -32,73 +33,70 @@ public final class FormetaParser {
3233

3334
private static final int BUFFER_SIZE = 1024 * 1024;
3435

35-
private char[] buffer = new char[BUFFER_SIZE];
36+
private char[] buffer = new char[BUFFER_SIZE];
3637
private final StructureParserContext structureParserContext = new StructureParserContext();
3738

3839
public void setEmitter(final Emitter emitter) {
3940
structureParserContext.setEmitter(emitter);
4041
}
41-
42+
4243
public Emitter getEmitter() {
4344
return structureParserContext.getEmitter();
4445
}
4546

4647
public void parse(final String data) {
4748
assert structureParserContext.getEmitter() != null: "No emitter set";
48-
49-
// According to http://stackoverflow.com/a/11876086 it is faster to copy
50-
// the string into a char array than to use charAt():
51-
final int recordLen = data.length();
52-
if(recordLen > buffer.length) {
53-
buffer = new char[buffer.length * 2];
54-
}
55-
data.getChars(0, recordLen, buffer, 0);
56-
49+
50+
// According to http://stackoverflow.com/a/11876086 it is faster to copy
51+
// a string into a char array then to use charAt():
52+
buffer = StringUtil.copyToBuffer(data, buffer);
53+
final int bufferLen = data.length();
54+
5755
structureParserContext.reset();
5856
StructureParserState state = StructureParserState.ITEM_NAME;
5957
int i = 0;
6058
try {
61-
for (; i < recordLen; ++i) {
59+
for (; i < bufferLen; ++i) {
6260
state = state.processChar(buffer[i], structureParserContext);
6361
}
64-
} catch (FormatException e) {
65-
final String errorMsg = "Parsing error at position "
62+
} catch (final FormatException e) {
63+
final String errorMsg = "Parsing error at position "
6664
+ (i + 1) + ": "
67-
+ getErrorSnippet(data, i) + ", "
65+
+ getErrorSnippet(data, i) + ", "
6866
+ e.getMessage();
6967
throw new FormatException(errorMsg, e);
7068
}
7169
try {
7270
state.endOfInput(structureParserContext);
73-
} catch (FormatException e) {
71+
} catch (final FormatException e) {
7472
throw new FormatException("Parsing error: " + e.getMessage(), e);
7573
}
7674
}
77-
75+
7876
/**
79-
* Extracts a text snippet from the record for showing the position at
80-
* which an error occurred. The exact position additionally highlighted
77+
* Extracts a text snippet from the record for showing the position at
78+
* which an error occurred. The exact position additionally highlighted
8179
* with {@link POS_MARKER_LEFT} and {@link POS_MARKER_RIGHT}.
82-
*
80+
*
8381
* @param record the record currently being parsed
8482
* @param pos the position at which the error occurred
8583
* @return a text snippet.
8684
*/
8785
private static String getErrorSnippet(final String record, final int pos) {
8886
final StringBuilder snippet = new StringBuilder();
89-
87+
9088
final int start = pos - SNIPPET_SIZE / 2;
9189
if (start < 0) {
9290
snippet.append(record.substring(0, pos));
9391
} else {
9492
snippet.append(SNIPPET_ELLIPSIS);
9593
snippet.append(record.substring(start, pos));
9694
}
97-
95+
9896
snippet.append(POS_MARKER_LEFT);
9997
snippet.append(record.charAt(pos));
10098
snippet.append(POS_MARKER_RIGHT);
101-
99+
102100
if (pos + 1 < record.length()) {
103101
final int end = pos + SNIPPET_SIZE / 2;
104102
if (end > record.length()) {
@@ -108,7 +106,7 @@ private static String getErrorSnippet(final String record, final int pos) {
108106
snippet.append(SNIPPET_ELLIPSIS);
109107
}
110108
}
111-
109+
112110
return snippet.toString();
113111
}
114112

0 commit comments

Comments
 (0)