Skip to content

Commit 98e5eb3

Browse files
committed
Updated PicaItemSplitter to ignore suffixed entities before the first item marker (101@) entity.
1 parent 72e8e91 commit 98e5eb3

File tree

2 files changed

+134
-63
lines changed

2 files changed

+134
-63
lines changed

src/main/java/org/culturegraph/mf/stream/pipe/bib/PicaItemSplitter.java

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,18 @@
3232
@Out(StreamReceiver.class)
3333
public final class PicaItemSplitter extends DefaultStreamPipe<StreamReceiver> {
3434

35+
private static final String ITEM_MARKER = "101@";
3536
private static final char SUFFIX_SEPARATOR = '/';
3637

3738
private String currentSuffix;
39+
private boolean inItemMarker;
3840
private String identifier;
3941

4042
@Override
4143
public void startRecord(final String identifier) {
4244
assert !isClosed();
4345
this.currentSuffix = null;
46+
this.inItemMarker = false;
4447
this.identifier = identifier;
4548
getReceiver().startRecord(identifier);
4649
}
@@ -54,31 +57,50 @@ public void endRecord() {
5457
@Override
5558
public void startEntity(final String name) {
5659
assert !isClosed();
60+
61+
if (ITEM_MARKER.equals(name)) {
62+
inItemMarker = true;
63+
currentSuffix = "";
64+
getReceiver().endRecord();
65+
getReceiver().startRecord(identifier);
66+
return;
67+
}
68+
69+
if (currentSuffix == null) {
70+
getReceiver().startEntity(name);
71+
return;
72+
}
73+
5774
int suffixStart = name.lastIndexOf(SUFFIX_SEPARATOR);
5875
if (suffixStart == -1) {
5976
suffixStart = name.length();
6077
}
6178
final String suffix = name.substring(suffixStart);
62-
if (currentSuffix != null) {
63-
if (!currentSuffix.equals(suffix)) {
64-
getReceiver().endRecord();
65-
getReceiver().startRecord(identifier);
66-
}
79+
if (!currentSuffix.equals(suffix)) {
80+
getReceiver().endRecord();
81+
getReceiver().startRecord(identifier);
6782
}
6883
currentSuffix = suffix;
6984
getReceiver().startEntity(name.substring(0, suffixStart));
7085
}
7186

7287
@Override
7388
public void endEntity() {
74-
assert !isClosed();
75-
getReceiver().endEntity();
89+
assert !isClosed();
90+
91+
if (!inItemMarker) {
92+
getReceiver().endEntity();
93+
}
94+
inItemMarker = false;
7695
}
7796

7897
@Override
7998
public void literal(final String name, final String value) {
80-
assert !isClosed();
81-
getReceiver().literal(name, value);
99+
assert !isClosed();
100+
101+
if (!inItemMarker) {
102+
getReceiver().literal(name, value);
103+
}
82104
}
83105

84106
}

src/test/java/org/culturegraph/mf/stream/pipe/bib/PicaItemSplitterTest.java

Lines changed: 103 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,13 @@
3535
public final class PicaItemSplitterTest {
3636

3737
private static final String RECORD_ID = "1";
38-
private static final String ENTITY = "001@";
39-
private static final String LITERAL1 = "a";
40-
private static final String LITERAL2 = "b";
38+
private static final String ENTITY = "001B";
39+
private static final String LITERAL = "a";
4140
private static final String VALUE = "val";
42-
private static final String ENTITY_WITH_SUFFIX1 = "002/01";
43-
private static final String ENTITY_WITH_SUFFIX2 = "002/02";
44-
private static final String ENTITY_WITH_SUFFIX_STRIPPED = "002";
41+
private static final String ITEM_MARKER_ENTITY = "101@";
42+
private static final String ENTITY_WITH_SUFFIX1 = "002A/01";
43+
private static final String ENTITY_WITH_SUFFIX2 = "002A/02";
44+
private static final String ENTITY_WITH_SUFFIX_STRIPPED = "002A";
4545

4646
private PicaItemSplitter picaItemSplitter;
4747

@@ -59,93 +59,142 @@ public void setup() {
5959
public void cleanup() {
6060
picaItemSplitter.closeStream();
6161
}
62-
62+
6363
@Test
64-
public void testShouldSplitAtFirstEntityWithSuffix() {
64+
public void testShouldSplitAtItemMarkerEntities() {
6565
picaItemSplitter.startRecord(RECORD_ID);
66-
picaItemSplitter.startEntity(ENTITY);
67-
picaItemSplitter.literal(LITERAL1, VALUE);
68-
picaItemSplitter.endEntity();
69-
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
70-
picaItemSplitter.literal(LITERAL2, VALUE);
71-
picaItemSplitter.endEntity();
66+
emitEntity();
67+
emitItemMarkerEntity();
68+
emitEntity();
69+
emitItemMarkerEntity();
70+
emitEntity();
7271
picaItemSplitter.endRecord();
7372

7473
final InOrder ordered = inOrder(receiver);
7574
ordered.verify(receiver).startRecord(RECORD_ID);
76-
ordered.verify(receiver).startEntity(ENTITY);
77-
ordered.verify(receiver).literal(LITERAL1, VALUE);
78-
ordered.verify(receiver).endEntity();
75+
verifyEntity(ordered);
7976
ordered.verify(receiver).endRecord();
8077
ordered.verify(receiver).startRecord(RECORD_ID);
81-
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
82-
ordered.verify(receiver).literal(LITERAL2, VALUE);
83-
ordered.verify(receiver).endEntity();
78+
verifyEntity(ordered);
79+
ordered.verify(receiver).endRecord();
80+
ordered.verify(receiver).startRecord(RECORD_ID);
81+
verifyEntity(ordered);
8482
ordered.verify(receiver).endRecord();
8583
}
8684

8785
@Test
88-
public void testShouldNotSplitIfTheFirstEntityHasASuffix() {
86+
public void testShouldCreateEmptyRecordsIfNothingBeforeOrAfterItemMarkers() {
8987
picaItemSplitter.startRecord(RECORD_ID);
90-
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
91-
picaItemSplitter.literal(LITERAL1, VALUE);
92-
picaItemSplitter.endEntity();
93-
picaItemSplitter.endRecord();
88+
emitItemMarkerEntity();
89+
emitItemMarkerEntity();
90+
picaItemSplitter.endRecord();
91+
9492

9593
final InOrder ordered = inOrder(receiver);
9694
ordered.verify(receiver).startRecord(RECORD_ID);
97-
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
98-
ordered.verify(receiver).literal(LITERAL1, VALUE);
99-
ordered.verify(receiver).endEntity();
95+
ordered.verify(receiver).endRecord();
96+
ordered.verify(receiver).startRecord(RECORD_ID);
97+
ordered.verify(receiver).endRecord();
98+
ordered.verify(receiver).startRecord(RECORD_ID);
10099
ordered.verify(receiver).endRecord();
101100
}
102101

103102
@Test
104-
public void testShouldSplitAtFirstEntityWithoutSuffix() {
103+
public void testShouldRemoveSuffix() {
105104
picaItemSplitter.startRecord(RECORD_ID);
106-
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
107-
picaItemSplitter.literal(LITERAL2, VALUE);
108-
picaItemSplitter.endEntity();
109-
picaItemSplitter.startEntity(ENTITY);
110-
picaItemSplitter.literal(LITERAL1, VALUE);
111-
picaItemSplitter.endEntity();
105+
emitItemMarkerEntity();
106+
emitSuffixedEntity1();
112107
picaItemSplitter.endRecord();
113108

114109
final InOrder ordered = inOrder(receiver);
115110
ordered.verify(receiver).startRecord(RECORD_ID);
116-
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
117-
ordered.verify(receiver).literal(LITERAL2, VALUE);
118-
ordered.verify(receiver).endEntity();
119-
ordered.verify(receiver).endRecord();
111+
ordered.verify(receiver).endRecord();
120112
ordered.verify(receiver).startRecord(RECORD_ID);
121-
ordered.verify(receiver).startEntity(ENTITY);
122-
ordered.verify(receiver).literal(LITERAL1, VALUE);
123-
ordered.verify(receiver).endEntity();
113+
verifySuffixedEntityStripped(ordered);
124114
ordered.verify(receiver).endRecord();
125115
}
126116

127117
@Test
128118
public void testShouldSplitWhenSuffixChanges() {
129119
picaItemSplitter.startRecord(RECORD_ID);
130-
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
131-
picaItemSplitter.literal(LITERAL1, VALUE);
132-
picaItemSplitter.endEntity();
133-
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX2);
134-
picaItemSplitter.literal(LITERAL2, VALUE);
135-
picaItemSplitter.endEntity();
120+
emitItemMarkerEntity();
121+
emitSuffixedEntity1();
122+
emitSuffixedEntity2();
136123
picaItemSplitter.endRecord();
137124

138125
final InOrder ordered = inOrder(receiver);
139126
ordered.verify(receiver).startRecord(RECORD_ID);
140-
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
141-
ordered.verify(receiver).literal(LITERAL1, VALUE);
142-
ordered.verify(receiver).endEntity();
127+
ordered.verify(receiver).endRecord();
128+
ordered.verify(receiver).startRecord(RECORD_ID);
129+
verifySuffixedEntityStripped(ordered);
143130
ordered.verify(receiver).endRecord();
144131
ordered.verify(receiver).startRecord(RECORD_ID);
145-
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
146-
ordered.verify(receiver).literal(LITERAL2, VALUE);
147-
ordered.verify(receiver).endEntity();
132+
verifySuffixedEntityStripped(ordered);
148133
ordered.verify(receiver).endRecord();
149134
}
135+
136+
@Test
137+
public void testShouldOnlySplitOnSuffixChangeAfterFirstItemMarkerEntity() {
138+
picaItemSplitter.startRecord(RECORD_ID);
139+
emitEntity();
140+
emitSuffixedEntity1();
141+
emitSuffixedEntity2();
142+
picaItemSplitter.endRecord();
143+
144+
final InOrder ordered = inOrder(receiver);
145+
ordered.verify(receiver).startRecord(RECORD_ID);
146+
verifyEntity(ordered);
147+
verifySuffixedEntity1(ordered);
148+
verifySuffixedEntity2(ordered);
149+
ordered.verify(receiver).endRecord();
150+
}
151+
152+
private void emitEntity() {
153+
picaItemSplitter.startEntity(ENTITY);
154+
picaItemSplitter.literal(LITERAL, VALUE);
155+
picaItemSplitter.endEntity();
156+
}
157+
158+
private void emitSuffixedEntity1() {
159+
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
160+
picaItemSplitter.literal(LITERAL, VALUE);
161+
picaItemSplitter.endEntity();
162+
}
163+
164+
private void emitSuffixedEntity2() {
165+
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX2);
166+
picaItemSplitter.literal(LITERAL, VALUE);
167+
picaItemSplitter.endEntity();
168+
}
169+
170+
private void emitItemMarkerEntity() {
171+
picaItemSplitter.startEntity(ITEM_MARKER_ENTITY);
172+
picaItemSplitter.literal(LITERAL, VALUE);
173+
picaItemSplitter.endEntity();
174+
}
175+
176+
private void verifyEntity(final InOrder ordered) {
177+
ordered.verify(receiver).startEntity(ENTITY);
178+
ordered.verify(receiver).literal(LITERAL, VALUE);
179+
ordered.verify(receiver).endEntity();
180+
}
181+
182+
private void verifySuffixedEntity1(final InOrder ordered) {
183+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX1);
184+
ordered.verify(receiver).literal(LITERAL, VALUE);
185+
ordered.verify(receiver).endEntity();
186+
}
187+
188+
private void verifySuffixedEntity2(final InOrder ordered) {
189+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX2);
190+
ordered.verify(receiver).literal(LITERAL, VALUE);
191+
ordered.verify(receiver).endEntity();
192+
}
193+
194+
private void verifySuffixedEntityStripped(final InOrder ordered) {
195+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
196+
ordered.verify(receiver).literal(LITERAL, VALUE);
197+
ordered.verify(receiver).endEntity();
198+
}
150199

151200
}

0 commit comments

Comments
 (0)