Skip to content

Commit efc1452

Browse files
committed
Fixed #33
1 parent add61d8 commit efc1452

File tree

2 files changed

+53
-19
lines changed

2 files changed

+53
-19
lines changed

src/main/java/org/culturegraph/mf/stream/converter/RegexDecoder.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
* </blockquote>
7777
* </p>
7878
*
79-
* @author Thomas Seidel
79+
* @author Thomas Seidel, Christoph Böhme
8080
*
8181
*/
8282
@Description("Decodes an incoming string based on a regular expression using named-capturing groups")
@@ -122,25 +122,32 @@ public void setDefaultLiteralName(final String defaultLiteralName) {
122122
}
123123

124124
@Override
125-
public void process(final String string) {
125+
public void process(final String str) {
126126
assert !isClosed();
127-
matcher.reset(string);
127+
128+
matcher.reset(str);
129+
if (!matcher.find()) {
130+
LOG.info("Ignoring non-matching input: {}", str);
131+
return;
132+
}
128133

134+
// Extract the record id:
129135
final String id;
130136
final int groupIndex = captureGroupNames.indexOf(ID_CAPTURE_GROUP) + 1;
131-
if (groupIndex > 0 && matcher.find()) {
137+
if (groupIndex > 0) {
132138
id = matcher.group(groupIndex);
133139
} else {
134140
id = "";
135141
}
136142
getReceiver().startRecord(id);
137143

144+
// Add a literal containing the unmodified input string:
138145
if (defaultLiteralName != null) {
139-
getReceiver().literal(defaultLiteralName, string);
146+
getReceiver().literal(defaultLiteralName, str);
140147
}
141148

142-
matcher.reset();
143-
while (matcher.find()) {
149+
// Emit literals:
150+
do {
144151
final int groupCount = matcher.groupCount();
145152
LOG.debug("groupCount() is: {}", Integer.valueOf(groupCount));
146153
for (int group = 1; group <= groupCount; ++group) {
@@ -150,7 +157,7 @@ public void process(final String string) {
150157
Integer.valueOf(group), literalName, literalValue);
151158
getReceiver().literal(literalName, literalValue);
152159
}
153-
}
160+
} while (matcher.find());
154161

155162
getReceiver().endRecord();
156163
}

src/test/java/org/culturegraph/mf/stream/converter/RegexDecoderTest.java

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,18 @@
1515
*/
1616
package org.culturegraph.mf.stream.converter;
1717

18-
import org.culturegraph.mf.exceptions.FormatException;
19-
import org.culturegraph.mf.stream.converter.RegexDecoder;
20-
import org.culturegraph.mf.stream.sink.EventList;
21-
import org.culturegraph.mf.stream.sink.StreamValidator;
22-
import org.junit.Assert;
23-
import org.junit.Test;
18+
import static org.junit.Assert.fail;
19+
20+
import org.culturegraph.mf.exceptions.FormatException;
21+
import org.culturegraph.mf.stream.sink.EventList;
22+
import org.culturegraph.mf.stream.sink.StreamValidator;
23+
import org.junit.Test;
2424

2525

2626
/**
2727
* Test {@link RegexDecoderTest}.
2828
*
29-
* @author Thomas Seidel
29+
* @author Thomas Seidel, Christoph Böhme
3030
*
3131
*/
3232
public final class RegexDecoderTest {
@@ -62,8 +62,8 @@ public void testRegex() {
6262
try {
6363
regexDecoder.process(INPUT);
6464
regexDecoder.closeStream();
65-
} catch(FormatException e) {
66-
Assert.fail(e.toString());
65+
} catch (FormatException e) {
66+
fail(e.toString());
6767
}
6868
}
6969

@@ -85,10 +85,37 @@ public void testRecordId() {
8585
try {
8686
regexDecoder.process("RECORD-ID:28,DATA:test");
8787
regexDecoder.closeStream();
88-
} catch(FormatException e) {
89-
Assert.fail(e.toString());
88+
} catch (FormatException e) {
89+
fail(e.toString());
9090
}
9191

92+
}
93+
94+
@Test
95+
public void testIgnoreNonMatching() {
96+
final EventList expected = new EventList();
97+
98+
expected.startRecord("");
99+
expected.literal("l", "v1");
100+
expected.endRecord();
101+
expected.startRecord("");
102+
expected.literal("l", "v2");
103+
expected.endRecord();
104+
expected.closeStream();
105+
106+
final RegexDecoder regexDecoder = new RegexDecoder("^l:(?<l>.*?)$");
107+
final StreamValidator validator = new StreamValidator(expected.getEvents());
108+
109+
regexDecoder.setReceiver(validator);
110+
111+
try {
112+
regexDecoder.process("l:v1");
113+
regexDecoder.process("garbage should be ignored");
114+
regexDecoder.process("l:v2");
115+
regexDecoder.closeStream();
116+
} catch (FormatException e) {
117+
fail(e.toString());
118+
}
92119
}
93120

94121
}

0 commit comments

Comments
 (0)