Skip to content

Commit 72e8e91

Browse files
committed
Added module PicaItemSplitter.
1 parent d8d0b3c commit 72e8e91

File tree

3 files changed

+236
-0
lines changed

3 files changed

+236
-0
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.pipe.bib;
17+
18+
import org.culturegraph.mf.framework.DefaultStreamPipe;
19+
import org.culturegraph.mf.framework.StreamReceiver;
20+
import org.culturegraph.mf.framework.annotations.Description;
21+
import org.culturegraph.mf.framework.annotations.In;
22+
import org.culturegraph.mf.framework.annotations.Out;
23+
24+
/**
25+
* Extract items from PICA records.
26+
*
27+
* @author Christoph Böhme
28+
*
29+
*/
30+
@Description("Extract items from PICA records.")
31+
@In(StreamReceiver.class)
32+
@Out(StreamReceiver.class)
33+
public final class PicaItemSplitter extends DefaultStreamPipe<StreamReceiver> {
34+
35+
private static final char SUFFIX_SEPARATOR = '/';
36+
37+
private String currentSuffix;
38+
private String identifier;
39+
40+
@Override
41+
public void startRecord(final String identifier) {
42+
assert !isClosed();
43+
this.currentSuffix = null;
44+
this.identifier = identifier;
45+
getReceiver().startRecord(identifier);
46+
}
47+
48+
@Override
49+
public void endRecord() {
50+
assert !isClosed();
51+
getReceiver().endRecord();
52+
}
53+
54+
@Override
55+
public void startEntity(final String name) {
56+
assert !isClosed();
57+
int suffixStart = name.lastIndexOf(SUFFIX_SEPARATOR);
58+
if (suffixStart == -1) {
59+
suffixStart = name.length();
60+
}
61+
final String suffix = name.substring(suffixStart);
62+
if (currentSuffix != null) {
63+
if (!currentSuffix.equals(suffix)) {
64+
getReceiver().endRecord();
65+
getReceiver().startRecord(identifier);
66+
}
67+
}
68+
currentSuffix = suffix;
69+
getReceiver().startEntity(name.substring(0, suffixStart));
70+
}
71+
72+
@Override
73+
public void endEntity() {
74+
assert !isClosed();
75+
getReceiver().endEntity();
76+
}
77+
78+
@Override
79+
public void literal(final String name, final String value) {
80+
assert !isClosed();
81+
getReceiver().literal(name, value);
82+
}
83+
84+
}

src/main/resources/flux-commands.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,4 @@ normalize-utf8 org.culturegraph.mf.stream.pipe.Utf8Normalizer
8484
morph org.culturegraph.mf.morph.Metamorph
8585
filter org.culturegraph.mf.stream.pipe.Filter
8686
add-oreaggregation org.culturegraph.mf.stream.pipe.OreAggregationAdder
87+
split-pica-items org.culturegraph.mf.stream.pipe.bib.PicaItemSplitter
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.pipe.bib;
17+
18+
import static org.mockito.Mockito.inOrder;
19+
20+
import org.culturegraph.mf.framework.StreamReceiver;
21+
import org.junit.After;
22+
import org.junit.Before;
23+
import org.junit.Test;
24+
import org.mockito.InOrder;
25+
import org.mockito.Mock;
26+
import org.mockito.MockitoAnnotations;
27+
28+
29+
/**
30+
* Tests {@link PicaItemSplitter}.
31+
*
32+
* @author Christoph Böhme
33+
*
34+
*/
35+
public final class PicaItemSplitterTest {
36+
37+
private static final String RECORD_ID = "1";
38+
private static final String ENTITY = "001@";
39+
private static final String LITERAL1 = "a";
40+
private static final String LITERAL2 = "b";
41+
private static final String VALUE = "val";
42+
private static final String ENTITY_WITH_SUFFIX1 = "002/01";
43+
private static final String ENTITY_WITH_SUFFIX2 = "002/02";
44+
private static final String ENTITY_WITH_SUFFIX_STRIPPED = "002";
45+
46+
private PicaItemSplitter picaItemSplitter;
47+
48+
@Mock
49+
private StreamReceiver receiver;
50+
51+
@Before
52+
public void setup() {
53+
MockitoAnnotations.initMocks(this);
54+
picaItemSplitter = new PicaItemSplitter();
55+
picaItemSplitter.setReceiver(receiver);
56+
}
57+
58+
@After
59+
public void cleanup() {
60+
picaItemSplitter.closeStream();
61+
}
62+
63+
@Test
64+
public void testShouldSplitAtFirstEntityWithSuffix() {
65+
picaItemSplitter.startRecord(RECORD_ID);
66+
picaItemSplitter.startEntity(ENTITY);
67+
picaItemSplitter.literal(LITERAL1, VALUE);
68+
picaItemSplitter.endEntity();
69+
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
70+
picaItemSplitter.literal(LITERAL2, VALUE);
71+
picaItemSplitter.endEntity();
72+
picaItemSplitter.endRecord();
73+
74+
final InOrder ordered = inOrder(receiver);
75+
ordered.verify(receiver).startRecord(RECORD_ID);
76+
ordered.verify(receiver).startEntity(ENTITY);
77+
ordered.verify(receiver).literal(LITERAL1, VALUE);
78+
ordered.verify(receiver).endEntity();
79+
ordered.verify(receiver).endRecord();
80+
ordered.verify(receiver).startRecord(RECORD_ID);
81+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
82+
ordered.verify(receiver).literal(LITERAL2, VALUE);
83+
ordered.verify(receiver).endEntity();
84+
ordered.verify(receiver).endRecord();
85+
}
86+
87+
@Test
88+
public void testShouldNotSplitIfTheFirstEntityHasASuffix() {
89+
picaItemSplitter.startRecord(RECORD_ID);
90+
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
91+
picaItemSplitter.literal(LITERAL1, VALUE);
92+
picaItemSplitter.endEntity();
93+
picaItemSplitter.endRecord();
94+
95+
final InOrder ordered = inOrder(receiver);
96+
ordered.verify(receiver).startRecord(RECORD_ID);
97+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
98+
ordered.verify(receiver).literal(LITERAL1, VALUE);
99+
ordered.verify(receiver).endEntity();
100+
ordered.verify(receiver).endRecord();
101+
}
102+
103+
@Test
104+
public void testShouldSplitAtFirstEntityWithoutSuffix() {
105+
picaItemSplitter.startRecord(RECORD_ID);
106+
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
107+
picaItemSplitter.literal(LITERAL2, VALUE);
108+
picaItemSplitter.endEntity();
109+
picaItemSplitter.startEntity(ENTITY);
110+
picaItemSplitter.literal(LITERAL1, VALUE);
111+
picaItemSplitter.endEntity();
112+
picaItemSplitter.endRecord();
113+
114+
final InOrder ordered = inOrder(receiver);
115+
ordered.verify(receiver).startRecord(RECORD_ID);
116+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
117+
ordered.verify(receiver).literal(LITERAL2, VALUE);
118+
ordered.verify(receiver).endEntity();
119+
ordered.verify(receiver).endRecord();
120+
ordered.verify(receiver).startRecord(RECORD_ID);
121+
ordered.verify(receiver).startEntity(ENTITY);
122+
ordered.verify(receiver).literal(LITERAL1, VALUE);
123+
ordered.verify(receiver).endEntity();
124+
ordered.verify(receiver).endRecord();
125+
}
126+
127+
@Test
128+
public void testShouldSplitWhenSuffixChanges() {
129+
picaItemSplitter.startRecord(RECORD_ID);
130+
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX1);
131+
picaItemSplitter.literal(LITERAL1, VALUE);
132+
picaItemSplitter.endEntity();
133+
picaItemSplitter.startEntity(ENTITY_WITH_SUFFIX2);
134+
picaItemSplitter.literal(LITERAL2, VALUE);
135+
picaItemSplitter.endEntity();
136+
picaItemSplitter.endRecord();
137+
138+
final InOrder ordered = inOrder(receiver);
139+
ordered.verify(receiver).startRecord(RECORD_ID);
140+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
141+
ordered.verify(receiver).literal(LITERAL1, VALUE);
142+
ordered.verify(receiver).endEntity();
143+
ordered.verify(receiver).endRecord();
144+
ordered.verify(receiver).startRecord(RECORD_ID);
145+
ordered.verify(receiver).startEntity(ENTITY_WITH_SUFFIX_STRIPPED);
146+
ordered.verify(receiver).literal(LITERAL2, VALUE);
147+
ordered.verify(receiver).endEntity();
148+
ordered.verify(receiver).endRecord();
149+
}
150+
151+
}

0 commit comments

Comments
 (0)