Skip to content

Commit 5757cbb

Browse files
author
mgeipel
committed
fixed #5
added asserts to the majority of classes in package mf.stream.
1 parent 868eb7e commit 5757cbb

38 files changed

+2613
-2607
lines changed

src/main/java/org/culturegraph/mf/stream/converter/CGTextDecoder.java

Lines changed: 90 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -13,92 +13,93 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
package org.culturegraph.mf.stream.converter;
17-
18-
import java.util.regex.Matcher;
19-
import java.util.regex.Pattern;
20-
21-
import org.culturegraph.mf.exceptions.FormatException;
22-
import org.culturegraph.mf.framework.DefaultObjectPipe;
23-
import org.culturegraph.mf.framework.StreamReceiver;
24-
import org.culturegraph.mf.framework.annotations.Description;
25-
import org.culturegraph.mf.framework.annotations.In;
26-
import org.culturegraph.mf.framework.annotations.Out;
27-
28-
29-
/**
30-
* Decodes a record stored in CG-Text format.
31-
*
32-
* @see CGTextEncoder
33-
*
34-
* @author Christoph Böhme
35-
*
36-
*/
37-
@Description("Decodes a record stored in CG-Text format.")
38-
@In(String.class)
39-
@Out(StreamReceiver.class)
40-
public final class CGTextDecoder
41-
extends DefaultObjectPipe<String, StreamReceiver> {
42-
43-
private static final String UNQUOTED_NAME = "(?:[A-Za-z0-9-_.:]+)";
44-
private static final String QUOTED_NAME = "(?:'(?:\\\\'|[^'])*')";
45-
private static final String NAME = "(" + UNQUOTED_NAME + "|" + QUOTED_NAME + ")";
46-
private static final String GROUP_START = "(?:\\{)";
47-
private static final String GROUP_END = "(?:\\})";
48-
private static final String CONTENT = "(?:(.*))";
49-
private static final String LEADING_WS = "(?:(?:\\A|\\G)\\s*)";
50-
private static final String TRAILING_WS = "(?:\\s*$)";
51-
private static final String ASSIGNMENT = "(?:\\s*=\\s*)";
52-
private static final String LIST_SEP = "(?:(?:\\s*,\\s*)|(?=\\s*\\})|" + TRAILING_WS + ")";
53-
54-
private static final Pattern RECORD = Pattern.compile(
55-
LEADING_WS + NAME + ASSIGNMENT + GROUP_START + CONTENT + GROUP_END + TRAILING_WS);
56-
private static final Pattern ENTITY_START = Pattern.compile(
57-
LEADING_WS + NAME + ASSIGNMENT + GROUP_START);
58-
private static final Pattern ENTITY_END = Pattern.compile(
59-
LEADING_WS + GROUP_END + LIST_SEP);
60-
private static final Pattern LITERAL = Pattern.compile(
61-
LEADING_WS + NAME + ASSIGNMENT + NAME + LIST_SEP);
62-
63-
@Override
64-
public void process(final String str) {
65-
final Matcher record = RECORD.matcher(str);
66-
if (!record.matches()) {
67-
throw new FormatException("expecting only a single record");
68-
}
69-
final String id = unescape(record.group(1));
70-
final String contents = record.group(2);
71-
getReceiver().startRecord(id);
72-
processList(contents);
73-
getReceiver().endRecord();
74-
}
75-
76-
private void processList(final String str) {
77-
final Matcher literal = LITERAL.matcher(str);
78-
final Matcher entityStart = ENTITY_START.matcher(str);
79-
final Matcher entityEnd = ENTITY_END.matcher(str);
80-
int pos = 0;
81-
while (pos < str.length()) {
82-
if (literal.find(pos)) {
83-
final String name = unescape(literal.group(1));
84-
final String value = unescape(literal.group(2));
85-
getReceiver().literal(name, value);
86-
pos = literal.end();
87-
} else if (entityStart.find(pos)) {
88-
final String name = unescape(entityStart.group(1));
89-
getReceiver().startEntity(name);
90-
pos = entityStart.end();
91-
} else if (entityEnd.find(pos)) {
92-
getReceiver().endEntity();
93-
pos = entityEnd.end();
94-
} else {
95-
throw new FormatException("unexpected format at position: " + pos);
96-
}
97-
}
98-
}
99-
100-
private String unescape(final String str) {
101-
return str.replaceAll("(^')|('$)", "").replace("\\'", "'").replace("\\\\", "\\");
102-
}
103-
104-
}
16+
package org.culturegraph.mf.stream.converter;
17+
18+
import java.util.regex.Matcher;
19+
import java.util.regex.Pattern;
20+
21+
import org.culturegraph.mf.exceptions.FormatException;
22+
import org.culturegraph.mf.framework.DefaultObjectPipe;
23+
import org.culturegraph.mf.framework.StreamReceiver;
24+
import org.culturegraph.mf.framework.annotations.Description;
25+
import org.culturegraph.mf.framework.annotations.In;
26+
import org.culturegraph.mf.framework.annotations.Out;
27+
28+
29+
/**
30+
* Decodes a record stored in CG-Text format.
31+
*
32+
* @see CGTextEncoder
33+
*
34+
* @author Christoph Böhme
35+
*
36+
*/
37+
@Description("Decodes a record stored in CG-Text format.")
38+
@In(String.class)
39+
@Out(StreamReceiver.class)
40+
public final class CGTextDecoder
41+
extends DefaultObjectPipe<String, StreamReceiver> {
42+
43+
private static final String UNQUOTED_NAME = "(?:[A-Za-z0-9-_.:]+)";
44+
private static final String QUOTED_NAME = "(?:'(?:\\\\'|[^'])*')";
45+
private static final String NAME = "(" + UNQUOTED_NAME + "|" + QUOTED_NAME + ")";
46+
private static final String GROUP_START = "(?:\\{)";
47+
private static final String GROUP_END = "(?:\\})";
48+
private static final String CONTENT = "(?:(.*))";
49+
private static final String LEADING_WS = "(?:(?:\\A|\\G)\\s*)";
50+
private static final String TRAILING_WS = "(?:\\s*$)";
51+
private static final String ASSIGNMENT = "(?:\\s*=\\s*)";
52+
private static final String LIST_SEP = "(?:(?:\\s*,\\s*)|(?=\\s*\\})|" + TRAILING_WS + ")";
53+
54+
private static final Pattern RECORD = Pattern.compile(
55+
LEADING_WS + NAME + ASSIGNMENT + GROUP_START + CONTENT + GROUP_END + TRAILING_WS);
56+
private static final Pattern ENTITY_START = Pattern.compile(
57+
LEADING_WS + NAME + ASSIGNMENT + GROUP_START);
58+
private static final Pattern ENTITY_END = Pattern.compile(
59+
LEADING_WS + GROUP_END + LIST_SEP);
60+
private static final Pattern LITERAL = Pattern.compile(
61+
LEADING_WS + NAME + ASSIGNMENT + NAME + LIST_SEP);
62+
63+
@Override
64+
public void process(final String str) {
65+
assert !isClosed();
66+
final Matcher record = RECORD.matcher(str);
67+
if (!record.matches()) {
68+
throw new FormatException("expecting only a single record");
69+
}
70+
final String id = unescape(record.group(1));
71+
final String contents = record.group(2);
72+
getReceiver().startRecord(id);
73+
processList(contents);
74+
getReceiver().endRecord();
75+
}
76+
77+
private void processList(final String str) {
78+
final Matcher literal = LITERAL.matcher(str);
79+
final Matcher entityStart = ENTITY_START.matcher(str);
80+
final Matcher entityEnd = ENTITY_END.matcher(str);
81+
int pos = 0;
82+
while (pos < str.length()) {
83+
if (literal.find(pos)) {
84+
final String name = unescape(literal.group(1));
85+
final String value = unescape(literal.group(2));
86+
getReceiver().literal(name, value);
87+
pos = literal.end();
88+
} else if (entityStart.find(pos)) {
89+
final String name = unescape(entityStart.group(1));
90+
getReceiver().startEntity(name);
91+
pos = entityStart.end();
92+
} else if (entityEnd.find(pos)) {
93+
getReceiver().endEntity();
94+
pos = entityEnd.end();
95+
} else {
96+
throw new FormatException("unexpected format at position: " + pos);
97+
}
98+
}
99+
}
100+
101+
private String unescape(final String str) {
102+
return str.replaceAll("(^')|('$)", "").replace("\\'", "'").replace("\\\\", "\\");
103+
}
104+
105+
}

src/main/java/org/culturegraph/mf/stream/converter/CGTextEncoder.java

Lines changed: 87 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -13,85 +13,90 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16-
package org.culturegraph.mf.stream.converter;
17-
18-
import org.culturegraph.mf.framework.DefaultStreamPipe;
19-
import org.culturegraph.mf.framework.ObjectReceiver;
20-
import org.culturegraph.mf.framework.StreamReceiver;
21-
import org.culturegraph.mf.framework.annotations.Description;
22-
import org.culturegraph.mf.framework.annotations.In;
23-
import org.culturegraph.mf.framework.annotations.Out;
24-
25-
/**
26-
* Serialise a record in CG-Text format.
27-
*
28-
* @see CGTextDecoder
29-
*
30-
* @author Christoph Böhme
31-
*
32-
*/
33-
@Description("Serialise a record in CG-Text format.")
34-
@In(StreamReceiver.class)
35-
@Out(String.class)
36-
public final class CGTextEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
37-
38-
private static final String START_GROUP = "={";
39-
private static final String END_GROUP = "}";
40-
private static final String SET_LITERAL = "=";
41-
private static final String LIST_SEP = ", ";
42-
43-
private final StringBuilder builder = new StringBuilder();
44-
45-
private String listSep;
46-
47-
@Override
48-
public void startRecord(final String identifier) {
49-
builder.delete(0, builder.length());
50-
builder.append(escape(identifier));
51-
builder.append(START_GROUP);
52-
listSep = "";
53-
}
54-
55-
@Override
56-
public void endRecord() {
57-
builder.append(END_GROUP);
58-
getReceiver().process(builder.toString());
59-
}
60-
61-
@Override
62-
public void startEntity(final String name) {
63-
builder.append(listSep);
64-
builder.append(escape(name));
65-
builder.append(START_GROUP);
66-
listSep = "";
67-
}
68-
69-
@Override
70-
public void endEntity() {
71-
builder.append(END_GROUP);
72-
listSep = LIST_SEP;
73-
}
74-
75-
@Override
76-
public void literal(final String name, final String value) {
77-
builder.append(listSep);
78-
builder.append(escape(name));
79-
builder.append(SET_LITERAL);
80-
builder.append(escape(value));
81-
listSep = LIST_SEP;
82-
}
83-
84-
@Override
85-
protected void onResetStream() {
86-
builder.delete(0, builder.length());
87-
listSep = "";
88-
}
89-
90-
private String escape(final String str) {
91-
if (str.matches("^(?:[A-Za-z0-9-_.:]+)$")) {
92-
return str;
93-
}
94-
return "'" + str.replace("\\", "\\\\").replace("'", "\\'") + "'";
95-
}
96-
97-
}
16+
package org.culturegraph.mf.stream.converter;
17+
18+
import org.culturegraph.mf.framework.DefaultStreamPipe;
19+
import org.culturegraph.mf.framework.ObjectReceiver;
20+
import org.culturegraph.mf.framework.StreamReceiver;
21+
import org.culturegraph.mf.framework.annotations.Description;
22+
import org.culturegraph.mf.framework.annotations.In;
23+
import org.culturegraph.mf.framework.annotations.Out;
24+
25+
/**
26+
* Serialise a record in CG-Text format.
27+
*
28+
* @see CGTextDecoder
29+
*
30+
* @author Christoph Böhme
31+
*
32+
*/
33+
@Description("Serialise a record in CG-Text format.")
34+
@In(StreamReceiver.class)
35+
@Out(String.class)
36+
public final class CGTextEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
37+
38+
private static final String START_GROUP = "={";
39+
private static final String END_GROUP = "}";
40+
private static final String SET_LITERAL = "=";
41+
private static final String LIST_SEP = ", ";
42+
43+
private final StringBuilder builder = new StringBuilder();
44+
45+
private String listSep;
46+
47+
@Override
48+
public void startRecord(final String identifier) {
49+
assert !isClosed();
50+
builder.delete(0, builder.length());
51+
builder.append(escape(identifier));
52+
builder.append(START_GROUP);
53+
listSep = "";
54+
}
55+
56+
@Override
57+
public void endRecord() {
58+
assert !isClosed();
59+
builder.append(END_GROUP);
60+
getReceiver().process(builder.toString());
61+
}
62+
63+
@Override
64+
public void startEntity(final String name) {
65+
assert !isClosed();
66+
builder.append(listSep);
67+
builder.append(escape(name));
68+
builder.append(START_GROUP);
69+
listSep = "";
70+
}
71+
72+
@Override
73+
public void endEntity() {
74+
assert !isClosed();
75+
builder.append(END_GROUP);
76+
listSep = LIST_SEP;
77+
}
78+
79+
@Override
80+
public void literal(final String name, final String value) {
81+
assert !isClosed();
82+
builder.append(listSep);
83+
builder.append(escape(name));
84+
builder.append(SET_LITERAL);
85+
builder.append(escape(value));
86+
listSep = LIST_SEP;
87+
}
88+
89+
@Override
90+
protected void onResetStream() {
91+
builder.delete(0, builder.length());
92+
listSep = "";
93+
}
94+
95+
private String escape(final String str) {
96+
if (str.matches("^(?:[A-Za-z0-9-_.:]+)$")) {
97+
return str;
98+
}
99+
return "'" + str.replace("\\", "\\\\").replace("'", "\\'") + "'";
100+
}
101+
102+
}

0 commit comments

Comments
 (0)