Skip to content

Commit 5a781f2

Browse files
lili
authored andcommitted
PicaEncoder implemented
The PicaEncoder encodes the "Record-Field-Subfields" Stream into pica+ Format. If there are S-subfields in the input stream which is processed by PicaDecoder (by default) it will be not able to encode them to original format (because of lost of data during the decoding). In this case, the decoder should use the option appendControlSubField="false" to keep the original data.
1 parent abde57d commit 5a781f2

File tree

4 files changed

+244
-42
lines changed

4 files changed

+244
-42
lines changed

.classpath

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,42 @@
1-
<?xml version="1.0" encoding="UTF-8"?>
2-
<classpath>
3-
<classpathentry kind="src" output="target/classes" path="src/main/java">
4-
<attributes>
5-
<attribute name="optional" value="true"/>
6-
<attribute name="maven.pomderived" value="true"/>
7-
</attributes>
8-
</classpathentry>
9-
<classpathentry kind="src" output="target/classes" path="src/main/antlr3">
10-
<attributes>
11-
<attribute name="optional" value="true"/>
12-
<attribute name="maven.pomderived" value="true"/>
13-
</attributes>
14-
</classpathentry>
15-
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
16-
<attributes>
17-
<attribute name="maven.pomderived" value="true"/>
18-
</attributes>
19-
</classpathentry>
20-
<classpathentry including="**/*.java" kind="src" output="target/test-classes" path="src/test/java">
21-
<attributes>
22-
<attribute name="optional" value="true"/>
23-
<attribute name="maven.pomderived" value="true"/>
24-
</attributes>
25-
</classpathentry>
26-
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
27-
<attributes>
28-
<attribute name="maven.pomderived" value="true"/>
29-
</attributes>
30-
</classpathentry>
31-
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
32-
<attributes>
33-
<attribute name="maven.pomderived" value="true"/>
34-
</attributes>
35-
</classpathentry>
36-
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
37-
<attributes>
38-
<attribute name="maven.pomderived" value="true"/>
39-
</attributes>
40-
</classpathentry>
41-
<classpathentry kind="output" path="target/classes"/>
42-
</classpath>
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<classpath>
3+
<classpathentry kind="src" output="target/classes" path="src/main/java">
4+
<attributes>
5+
<attribute name="optional" value="true"/>
6+
<attribute name="maven.pomderived" value="true"/>
7+
</attributes>
8+
</classpathentry>
9+
<classpathentry kind="src" output="target/classes" path="src/main/antlr3">
10+
<attributes>
11+
<attribute name="optional" value="true"/>
12+
<attribute name="maven.pomderived" value="true"/>
13+
</attributes>
14+
</classpathentry>
15+
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
16+
<attributes>
17+
<attribute name="maven.pomderived" value="true"/>
18+
</attributes>
19+
</classpathentry>
20+
<classpathentry including="**/*.java" kind="src" output="target/test-classes" path="src/test/java">
21+
<attributes>
22+
<attribute name="optional" value="true"/>
23+
<attribute name="maven.pomderived" value="true"/>
24+
</attributes>
25+
</classpathentry>
26+
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
27+
<attributes>
28+
<attribute name="maven.pomderived" value="true"/>
29+
</attributes>
30+
</classpathentry>
31+
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
32+
<attributes>
33+
<attribute name="maven.pomderived" value="true"/>
34+
</attributes>
35+
</classpathentry>
36+
<classpathentry exported="true" kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
37+
<attributes>
38+
<attribute name="maven.pomderived" value="true"/>
39+
</attributes>
40+
</classpathentry>
41+
<classpathentry kind="output" path="target/classes"/>
42+
</classpath>
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.converter.bib;
17+
18+
import java.text.Normalizer;
19+
import java.text.Normalizer.Form;
20+
21+
import org.culturegraph.mf.framework.DefaultStreamPipe;
22+
23+
import org.culturegraph.mf.framework.DefaultStreamPipe;
24+
import org.culturegraph.mf.framework.ObjectReceiver;
25+
import org.culturegraph.mf.framework.StreamReceiver;
26+
import org.culturegraph.mf.framework.annotations.Description;
27+
import org.culturegraph.mf.framework.annotations.In;
28+
import org.culturegraph.mf.framework.annotations.Out;
29+
30+
31+
/**
32+
* Encodes an event stream in pica+ format.
33+
*
34+
* @see PicaEncoder
35+
*
36+
* @author Markus Michael Geipel, Christoph Böhme, Yining Li
37+
*
38+
*/
39+
@Description("Encodes a stream in pica+ Format")
40+
@In(StreamReceiver.class)
41+
@Out(String.class)
42+
public class PicaEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
43+
44+
private static final String FIELD_DELIMITER = "\u001e";
45+
private static final String SUB_DELIMITER = "\u001f";
46+
private static boolean idnControlSubField = false;
47+
private StringBuilder builder = new StringBuilder();
48+
private String idn="";
49+
50+
/**
51+
* For each field in the stream the method calls:
52+
* <ol>
53+
* <li>receiver.startEntity</li>
54+
* <li>receiver.literal for each subfield of the field</li>
55+
* <li>receiver.endEntity</li>
56+
* </ol>
57+
* Fields without any subfield will be skipped.<br>
58+
* <strong>Special handling of subfield 'S':</strong> the code of
59+
* "control subfields" (subfield name='S') will be appended to the
60+
* fieldName. E.g.: 041A $Saxx would be mapped to the fieldName 041Aa,
61+
* and xx will be ignored. A recovery of such field to original is not implemented.
62+
* So the encoder cannot identify a S-field. The S-field special processing
63+
* can be turn of if the call of decode with the option:
64+
* (appendcontrolsubfield="false")
65+
* which default is set to true.
66+
*
67+
* @param record
68+
*/
69+
@Override
70+
public final void startRecord(final String name) {
71+
// the name is a idn, which should be found in the encoded data under 003@.
72+
this.idn = name;
73+
}
74+
75+
public final boolean compareIdFromRecord(final String gndId) {
76+
if (this.idn.equals(gndId)) {
77+
idnControlSubField = false; //only test this context.
78+
return true;
79+
}
80+
throw new MissingIdException(gndId);
81+
}
82+
83+
84+
@Override
85+
public final void startEntity(final String name) {
86+
// Here begins a field (i.e. "028A ", which is given in the name.
87+
// It is unknown, whether there are any subfields in the field.
88+
builder.append(name.trim()+ " ");
89+
if (name.trim().equals("003@")) {
90+
//Time to check nid
91+
idnControlSubField = true;
92+
}else {
93+
//No check is necessary.
94+
idnControlSubField = false;
95+
}
96+
}
97+
98+
@Override
99+
public final void literal(final String name, final String value) {
100+
//
101+
final String value_new = Normalizer.normalize(value, Form.NFD);
102+
if (idnControlSubField == true){
103+
// it is a 003@ field, the same nid delivered with record should follow
104+
if (compareIdFromRecord(value)) idnControlSubField = false;
105+
}
106+
builder.append(SUB_DELIMITER);
107+
builder.append(name);
108+
builder.append(value_new);
109+
}
110+
111+
@Override
112+
public final void endEntity() {
113+
builder.append(FIELD_DELIMITER);
114+
}
115+
116+
@Override
117+
public final void endRecord() {
118+
getReceiver().process(builder.toString());
119+
builder = new StringBuilder();
120+
}
121+
122+
}

src/main/resources/flux-commands.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ encode-literals org.culturegraph.mf.stream.converter.StreamLiteralFormater
4444
encode-cgentity org.culturegraph.mf.stream.converter.CGEntityEncoder
4545
encode-formeta org.culturegraph.mf.stream.converter.FormetaEncoder
4646
encode-json org.culturegraph.mf.stream.converter.JsonEncoder
47+
encode-pica org.culturegraph.mf.stream.converter.bib.PicaEncoder
4748

4849
write org.culturegraph.mf.stream.sink.ObjectWriter
4950
write-triples org.culturegraph.mf.stream.sink.TripleWriter
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright 2013 Deutsche Nationalbibliothek
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.stream.converter.bib;
17+
18+
import static org.mockito.Mockito.verify;
19+
20+
import org.culturegraph.mf.framework.ObjectReceiver;
21+
import org.junit.After;
22+
import org.junit.Before;
23+
import org.junit.Test;
24+
import org.mockito.Mock;
25+
import org.mockito.MockitoAnnotations;
26+
27+
/**
28+
* Tests for {@link PicaEncoder}
29+
*
30+
* @author li
31+
*
32+
*/
33+
public final class PicaEncoderTest {
34+
35+
private PicaEncoder picaEncoder;
36+
37+
@Mock
38+
private ObjectReceiver<String> receiver;
39+
40+
@Before
41+
public void setup() {
42+
MockitoAnnotations.initMocks(this);
43+
picaEncoder = new PicaEncoder();
44+
picaEncoder.setReceiver(receiver);
45+
}
46+
47+
@After
48+
public void cleanup() {
49+
picaEncoder.closeStream();
50+
}
51+
52+
@Test
53+
public void testShouldWriteFieldAndSubfield() {
54+
picaEncoder.startRecord("17709958X");
55+
picaEncoder.startEntity("003@");
56+
picaEncoder.literal("0", "17709958X");
57+
picaEncoder.endEntity();
58+
picaEncoder.startEntity("028@");
59+
picaEncoder.literal("P", "Abläöübolo");
60+
picaEncoder.literal("n", "VIX");
61+
picaEncoder.literal("l", "Bapst");
62+
picaEncoder.endEntity();
63+
picaEncoder.endRecord();
64+
65+
verify(receiver).process("003@ \u001f017709958X\u001e028@ \u001fPAbla\u0308o\u0308u\u0308bolo\u001fnVIX\u001flBapst\u001e");
66+
}
67+
68+
// @Test
69+
// public void testShouldPrefer007KOverRecordId() {
70+
// picaEncoder.startRecord("10");
71+
// picaEncoder.startEntity("007");
72+
// picaEncoder.literal("K", "11");
73+
// picaEncoder.endEntity();
74+
// picaEncoder.endRecord();
75+
//
76+
// verify(receiver).process("007K11");
77+
// }
78+
79+
}

0 commit comments

Comments
 (0)